def test_constant_schedule(): schedule = ConstantSchedule(0.3) # make sure the values in the constant schedule don't change over time for i in range(1000): assert schedule.initial_value == 0.3 assert schedule.current_value == 0.3 schedule.step()
def __init__(self): super().__init__(algorithm=NECAlgorithmParameters(), exploration=EGreedyParameters(), memory=NECMemoryParameters(), networks={"main": NECNetworkParameters()}) self.exploration.epsilon_schedule = ConstantSchedule(0.1) self.exploration.evaluation_epsilon = 0.01
def __init__(self, max_size: Tuple[MemoryGranularity, int], alpha: float = 0.6, beta: Schedule = ConstantSchedule(0.4), epsilon: float = 1e-6, allow_duplicates_in_batch_sampling: bool = True): """ :param max_size: the maximum number of transitions or episodes to hold in the memory :param alpha: the alpha prioritization coefficient :param beta: the beta parameter used for importance sampling :param epsilon: a small value added to the priority of each transition :param allow_duplicates_in_batch_sampling: allow having the same transition multiple times in a batch """ if max_size[0] != MemoryGranularity.Transitions: raise ValueError( "Prioritized Experience Replay currently only support setting the memory size in " "transitions granularity.") self.power_of_2_size = 1 while self.power_of_2_size < max_size[1]: self.power_of_2_size *= 2 super().__init__((MemoryGranularity.Transitions, self.power_of_2_size), allow_duplicates_in_batch_sampling) self.sum_tree = SegmentTree(self.power_of_2_size, SegmentTree.Operation.SUM) self.min_tree = SegmentTree(self.power_of_2_size, SegmentTree.Operation.MIN) self.max_tree = SegmentTree(self.power_of_2_size, SegmentTree.Operation.MAX) self.alpha = alpha self.beta = beta self.epsilon = epsilon self.maximal_priority = 1.0
def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn): task_parameters = TaskParameters(framework_type="tensorflow", experiment_path="./experiments/test") extra_params = {'save_checkpoint_secs': None, 'render': True} task_parameters.__dict__.update(extra_params) # Create a dictionary of parameters that Coach will handover to CNNEnvironment # Once it creates it. if True: exploration_noise = 0.5 #exploration_noise = 0.25 exploitation_decay = 0.996 graph_manager.env_params.additional_simulator_parameters = { 'model': model, 'dataset': dataset, 'arch': arch, 'data_loader': data_loader, 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, #'action_range': (0.10, 0.95), 'action_range': (0.70, 0.95), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': None, 'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top5/100) * math.log(total_macs) #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs) #'reward_fn': lambda top1, total_macs: -1 * max(1-top1/100, 0.25) * math.log(total_macs) #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs/100000) #'reward_fn': lambda top1, total_macs: top1/100 * total_macs/self.dense_model_macs } else: exploration_noise = 0.5 #exploration_noise = 0.25 exploitation_decay = 0.996 graph_manager.env_params.additional_simulator_parameters = { 'model': model, 'dataset': dataset, 'arch': arch, 'data_loader': data_loader, 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, 'action_range': (0.10, 0.95), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': 1.5e8, 'reward_fn': lambda top1, total_macs: top1/100 #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75) } #msglogger.debug('Experiment configuarion:\n' + json.dumps(graph_manager.env_params.additional_simulator_parameters, indent=2)) steps_per_episode = 13 agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([(ConstantSchedule(exploration_noise), EnvironmentSteps(100*steps_per_episode)), (ExponentialSchedule(exploration_noise, 0, exploitation_decay), EnvironmentSteps(300*steps_per_episode))]) graph_manager.create_graph(task_parameters) graph_manager.improve()
def coach_adc(model, dataset, arch, optimizer_data, validate_fn, save_checkpoint_fn, train_fn): # task_parameters = TaskParameters(framework_type="tensorflow", # experiment_path="./experiments/test") # extra_params = {'save_checkpoint_secs': None, # 'render': True} # task_parameters.__dict__.update(extra_params) task_parameters = TaskParameters(experiment_path=logger.get_experiment_path('adc')) conv_cnt = count_conv_layer(model) # Create a dictionary of parameters that Coach will handover to CNNEnvironment # Once it creates it. services = distiller.utils.MutableNamedTuple({ 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, 'train_fn': train_fn}) app_args = distiller.utils.MutableNamedTuple({ 'dataset': dataset, 'arch': arch, 'optimizer_data': optimizer_data}) if True: amc_cfg = distiller.utils.MutableNamedTuple({ #'action_range': (0.20, 0.95), 'action_range': (0.20, 0.80), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': None, 'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top1/100) * math.log(total_macs), 'conv_cnt': conv_cnt, 'max_reward': -1000}) else: amc_cfg = distiller.utils.MutableNamedTuple({ 'action_range': (0.10, 0.95), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': 1.5e8, 'reward_fn': lambda top1, top5, vloss, total_macs: top1/100, #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75), 'conv_cnt': conv_cnt, 'max_reward': -1000}) # These parameters are passed to the Distiller environment graph_manager.env_params.additional_simulator_parameters = {'model': model, 'app_args': app_args, 'amc_cfg': amc_cfg, 'services': services} exploration_noise = 0.5 exploitation_decay = 0.996 steps_per_episode = conv_cnt agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([ (ConstantSchedule(exploration_noise), EnvironmentSteps(100*steps_per_episode)), (ExponentialSchedule(exploration_noise, 0, exploitation_decay), EnvironmentSteps(300*steps_per_episode))]) graph_manager.create_graph(task_parameters) graph_manager.improve()
def __init__(self): super().__init__() self.num_episodes_in_experience_replay = 1000000 self.policy_gradient_rescaler = PolicyGradientRescaler.GAE self.gae_lambda = 0.95 self.use_kl_regularization = False self.clip_likelihood_ratio_using_epsilon = 0.2 self.estimate_state_value_using_gae = True self.beta_entropy = 0.01 # should be 0 for mujoco self.num_consecutive_playing_steps = EnvironmentSteps(2048) self.optimization_epochs = 10 self.normalization_stats = None self.clipping_decay_schedule = ConstantSchedule(1) self.act_for_full_episodes = True
def test_piece_wise_schedule(): # decreasing schedule schedule = PieceWiseSchedule( [(LinearSchedule(1, 3, 10), EnvironmentSteps(5)), (ConstantSchedule(4), EnvironmentSteps(10)), (ExponentialSchedule(3, 1, 0.99), EnvironmentSteps(10)) ] ) target_values = np.append(np.linspace(1, 2, 6), np.ones(11)*4) for i in range(16): assert round(schedule.current_value, 4) == round(target_values[i], 4) schedule.step() current_power = 1 for i in range(10): assert round(schedule.current_value, 4) == round(3*current_power, 4) current_power *= 0.99 schedule.step()
def do_adc_internal(model, args, optimizer_data, validate_fn, save_checkpoint_fn, train_fn): dataset = args.dataset arch = args.arch perform_thinning = True # args.amc_thinning num_ft_epochs = args.amc_ft_epochs action_range = args.amc_action_range np.random.seed() conv_cnt = count_conv_layer(model) msglogger.info("Executing AMC: RL agent - %s RL library - %s", args.amc_agent_algo, RLLIB) # Create a dictionary of parameters that Coach will handover to DistillerWrapperEnvironment # Once it creates it. services = distiller.utils.MutableNamedTuple({ 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, 'train_fn': train_fn }) app_args = distiller.utils.MutableNamedTuple({ 'dataset': dataset, 'arch': arch, 'optimizer_data': optimizer_data }) amc_cfg = distiller.utils.MutableNamedTuple({ 'protocol': args.amc_protocol, 'agent_algo': args.amc_agent_algo, 'perform_thinning': perform_thinning, 'num_ft_epochs': num_ft_epochs, 'action_range': action_range, 'conv_cnt': conv_cnt, 'reward_frequency': args.amc_reward_frequency }) #net_wrapper = NetworkWrapper(model, app_args, services) #return sample_networks(net_wrapper, services) if args.amc_protocol == "accuracy-guaranteed": amc_cfg.target_density = None amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: -( 1 - top1 / 100) * math.log(total_macs) amc_cfg.action_constrain_fn = None elif args.amc_protocol == "mac-constrained": amc_cfg.target_density = args.amc_target_density amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: top1 / 100 #(90.5 - top1) / 10 amc_cfg.action_constrain_fn = DistillerWrapperEnvironment.get_action elif args.amc_protocol == "mac-constrained-experimental": amc_cfg.target_density = args.amc_target_density amc_cfg.reward_fn = experimental_reward_fn amc_cfg.action_constrain_fn = None else: raise ValueError("{} is not supported currently".format( args.amc_protocol)) steps_per_episode = conv_cnt if args.amc_agent_algo == "DDPG": amc_cfg.heatup_noise = 0.5 amc_cfg.initial_training_noise = 0.5 amc_cfg.training_noise_decay = 0.996 # 0.998 amc_cfg.num_heatup_epochs = args.amc_heatup_epochs amc_cfg.num_training_epochs = args.amc_training_epochs training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode if amc_cfg.agent_algo == "Random-policy": return random_agent( DistillerWrapperEnvironment(model, app_args, amc_cfg, services)) if RLLIB == "spinup": msglogger.info("AMC: Using spinup") env1 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services) env2 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services) ddpg_spinup(env1, env2) else: msglogger.info("AMC: Using coach") # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct # Coach to create and use our DistillerWrapperEnvironment environment. # So Distiller calls Coach, which creates the environment, trains the agent, and ends. if args.amc_agent_algo == "DDPG": from examples.automated_deep_compression.presets.ADC_DDPG import graph_manager, agent_params agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule( [(ConstantSchedule(amc_cfg.heatup_noise), EnvironmentSteps(heatup_duration)), (ExponentialSchedule(amc_cfg.initial_training_noise, 0, amc_cfg.training_noise_decay), EnvironmentSteps(training_noise_duration))]) # agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0) elif "ClippedPPO" in args.amc_agent_algo: from examples.automated_deep_compression.presets.ADC_ClippedPPO import graph_manager, agent_params # These parameters are passed to the Distiller environment graph_manager.env_params.additional_simulator_parameters = { 'model': model, 'app_args': app_args, 'amc_cfg': amc_cfg, 'services': services } coach_logs_dir = os.path.join(msglogger.logdir, 'coach') os.mkdir(coach_logs_dir) task_parameters = TaskParameters(experiment_path=coach_logs_dir) graph_manager.create_graph(task_parameters) graph_manager.improve()
agent_params.network_wrappers['main'].batch_size = 128 agent_params.network_wrappers['main'].middleware_parameters.scheme = [ Dense([256]) ] agent_params.network_wrappers['main'].input_embedders_parameters = { 'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } agent_params.algorithm.discount = 0.98 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16) agent_params.algorithm.num_consecutive_training_steps = 40 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps( 40) agent_params.algorithm.rate_for_copying_weights_to_target = 0.05 agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6) agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2) agent_params.exploration.evaluation_epsilon = 0 agent_params.memory = EpisodicHindsightExperienceReplayParameters() agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Final agent_params.memory.hindsight_transitions_per_regular_transition = 1 agent_params.memory.goals_space = GoalsSpace( goal_name='state', reward_type=ReachingGoal(distance_from_goal_threshold=0, goal_reaching_reward=0, default_reward=-1), distance_metric=GoalsSpace.DistanceMetric.Euclidean) ############### # Environment # ###############
# HER parameters agent_params.memory = EpisodicHindsightExperienceReplayParameters() agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6) agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future agent_params.memory.hindsight_transitions_per_regular_transition = 4 agent_params.memory.goals_space = GoalsSpace(goal_name='achieved_goal', reward_type=ReachingGoal(distance_from_goal_threshold=0.05, goal_reaching_reward=0, default_reward=-1), distance_metric=GoalsSpace.DistanceMetric.Euclidean) agent_params.memory.shared_memory = True # exploration parameters agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = ConstantSchedule(0.3) agent_params.exploration.evaluation_epsilon = 0 # they actually take the noise_schedule to be 0.2 * max_abs_range which is 0.1 * total_range agent_params.exploration.continuous_exploration_policy_parameters.noise_schedule = ConstantSchedule(0.1) agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0 agent_params.input_filter = InputFilter() agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200)) agent_params.pre_network_filter = InputFilter() agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal', ObservationNormalizationFilter(name='normalize_achieved_goal')) agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal', ObservationNormalizationFilter(name='normalize_desired_goal'))
agent_params.input_filter = InputFilter() agent_params.input_filter.add_observation_filter('CameraRGB', 'cropping', ObservationCropFilter(crop_low=np.array([115, 0, 0]), crop_high=np.array([510, -1, -1]))) agent_params.input_filter.add_observation_filter('CameraRGB', 'rescale', ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([88, 200, 3]), high=255))) agent_params.input_filter.add_observation_filter('CameraRGB', 'to_uint8', ObservationToUInt8Filter(0, 255)) agent_params.input_filter.add_observation_filter( 'measurements', 'select_speed', ObservationReductionBySubPartsNameFilter( ["forward_speed"], reduction_method=ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep)) # no exploration is used agent_params.exploration = AdditiveNoiseParameters() agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0) agent_params.exploration.evaluation_noise_percentage = 0 # no playing during the training phase agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(0) # use the following command line to download and extract the CARLA dataset: # python rl_coach/utilities/carla_dataset_to_replay_buffer.py agent_params.memory.load_memory_from_file_path = "./datasets/carla_train_set_replay_buffer.p" agent_params.memory.state_key_with_the_class_index = 'high_level_command' agent_params.memory.num_classes = 4 # download dataset if it doesn't exist if not os.path.exists(agent_params.memory.load_memory_from_file_path): screen.log_title("The CARLA dataset is not present in the following path: {}" .format(agent_params.memory.load_memory_from_file_path))
schedule_params.evaluation_steps = EnvironmentSteps(135000) schedule_params.heatup_steps = EnvironmentSteps(50000) ######### # Agent # ######### agent_params = DDQNAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()] agent_params.network_wrappers['main'].clip_gradients = 10 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(40000) agent_params.exploration.epsilon_schedule = PieceWiseSchedule( [(LinearSchedule(1, 0.1, 1000000), EnvironmentSteps(1000000)), (LinearSchedule(0.1, 0.01, 10000000), EnvironmentSteps(1000000)), (ConstantSchedule(0.001), EnvironmentSteps(10000000))] ) agent_params.memory = PrioritizedExperienceReplayParameters() agent_params.memory.beta = LinearSchedule(0.4, 1, 12500000) # 12.5M training iterations = 50M steps = 200M frames ############### # Environment # ############### env_params = Atari() env_params.level = SingleLevelSelection(atari_deterministic_v4) vis_params = VisualizationParameters() vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()] vis_params.dump_mp4 = False ########
'action'].scheme = EmbedderScheme.Empty agent_params.network_wrappers['actor'].heads_parameters[ 0].activation_function = 'sigmoid' #agent_params.network_wrappers['critic'].clip_gradients = 100 #agent_params.network_wrappers['actor'].clip_gradients = 100 agent_params.algorithm.rate_for_copying_weights_to_target = 0.01 # Tau pg. 11 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 1) agent_params.algorithm.discount = 1 agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000) agent_params.exploration = TruncatedNormalParameters( ) # AdditiveNoiseParameters() steps_per_episode = 13 agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([ (ConstantSchedule(0.5), EnvironmentSteps(100 * steps_per_episode)), (ExponentialSchedule(0.5, 0, 0.996), EnvironmentSteps(300 * steps_per_episode)) ]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) agent_params.input_filter = MujocoInputFilter() agent_params.output_filter = MujocoOutputFilter() agent_params.network_wrappers['actor'].learning_rate = 0.0001 agent_params.network_wrappers['critic'].learning_rate = 0.001 ############################## # Gym # ############################## env_params = GymEnvironmentParameters() env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment'
agent_params = SILAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 5 agent_params.algorithm.beta_entropy = 0.01 agent_params.network_wrappers['main'].middleware_parameters = FCMiddlewareParameters() agent_params.network_wrappers['main'].learning_rate = 0.0007 agent_params.network_wrappers['main'].batch_size = 32 agent_params.network_wrappers['main'].async_training = False # scaling down the gradients + each agent trains with a batch of 32 = one agent training with a batch of 512 agent_params.network_wrappers['main'].scale_down_gradients_by_number_of_workers_for_sync_training = True agent_params.memory.shared_memory = True # according to the SIL code, only a single replay buffer is used agent_params.memory.max_size = (MemoryGranularity.Transitions, 100000) agent_params.memory.beta = ConstantSchedule(0.1) # called bias correction in the paper - this value is only used for # hard exploration problems agent_params.exploration = CategoricalParameters() ############### # Environment # ############### env_params = Atari() env_params.level = SingleLevelSelection(atari_deterministic_v4) vis_params = VisualizationParameters() vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()] vis_params.dump_mp4 = False
def __init__(self): super().__init__() self.max_size = (MemoryGranularity.Transitions, 1000000) self.alpha = 0.6 self.beta = ConstantSchedule(0.4) self.epsilon = 1e-6
agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.gae_lambda = 0.96 agent_params.algorithm.beta_entropy = 0 agent_params.network_wrappers['main'].clip_gradients = 10.0 agent_params.network_wrappers['main'].learning_rate = 0.00001 # agent_params.network_wrappers['main'].batch_size = 20 agent_params.network_wrappers['main'].input_embedders_parameters = { "screen": InputEmbedderParameters(input_rescaling={'image': 3.0}) } agent_params.exploration = AdditiveNoiseParameters() agent_params.exploration.noise_schedule = ConstantSchedule(0.05) # agent_params.exploration.noise_schedule = LinearSchedule(0.4, 0.05, 100000) agent_params.exploration.evaluation_noise = 0.05 agent_params.network_wrappers['main'].batch_size = 64 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 ############### # Environment # ############### env_params = StarCraft2EnvironmentParameters(level='CollectMineralShards') env_params.feature_screen_maps_to_use = [5] env_params.feature_minimap_maps_to_use = [5]