Beispiel #1
0
    def solve(self, model, app_args, amc_cfg, services, steps_per_episode):
        msglogger.info("AMC: Using coach")

        # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct
        # Coach to create and use our DistillerWrapperEnvironment environment.
        # So Distiller calls Coach, which creates the environment, trains the agent, and ends.
        if amc_cfg.agent_algo == "DDPG":
            from examples.auto_compression.amc.rl_libs.coach.presets.ADC_DDPG import (
                graph_manager, agent_params)
            graph_manager.agent_params.exploration.noise_schedule = ExponentialSchedule(
                amc_cfg.ddpg_cfg.initial_training_noise, 0,
                amc_cfg.ddpg_cfg.training_noise_decay)
            # Number of iterations to train
            graph_manager.agent_params.algorithm.num_consecutive_training_steps = steps_per_episode
            #graph_manager.agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(1)
            graph_manager.agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(
                steps_per_episode)
            # Heatup
            graph_manager.heatup_steps = EnvironmentEpisodes(
                amc_cfg.ddpg_cfg.num_heatup_episodes)
            # Replay buffer size
            graph_manager.agent_params.memory.max_size = (
                MemoryGranularity.Transitions,
                amc_cfg.ddpg_cfg.replay_buffer_size)
            amc_cfg.ddpg_cfg.training_noise_decay = amc_cfg.ddpg_cfg.training_noise_decay**(
                1. / steps_per_episode)
        elif "ClippedPPO" in amc_cfg.agent_algo:
            from examples.auto_compression.amc.rl_libs.coach.presets.ADC_ClippedPPO import graph_manager, agent_params
        elif "TD3" in amc_cfg.agent_algo:
            from examples.auto_compression.amc.rl_libs.coach.presets.ADC_TD3 import graph_manager, agent_params
        else:
            raise ValueError(
                "The agent algorithm you are trying to use (%s) is not supported"
                % amc_cfg.amc_agent_algo)

        # Number of training steps
        n_training_episodes = amc_cfg.ddpg_cfg.num_training_episodes
        graph_manager.improve_steps = EnvironmentEpisodes(n_training_episodes)
        # Don't evaluate until the end
        graph_manager.steps_between_evaluation_periods = EnvironmentEpisodes(
            n_training_episodes)

        # These parameters are passed to the Distiller environment
        env_cfg = {
            'model': model,
            'app_args': app_args,
            'amc_cfg': amc_cfg,
            'services': services
        }
        graph_manager.env_params.additional_simulator_parameters = env_cfg

        coach_logs_dir = os.path.join(msglogger.logdir, 'coach')
        os.mkdir(coach_logs_dir)
        task_parameters = TaskParameters(experiment_path=coach_logs_dir)
        # Set Coach's PRNG seed
        if app_args.seed is not None:
            task_parameters.seed = app_args.seed
        graph_manager.create_graph(task_parameters)
        graph_manager.improve()
Beispiel #2
0
def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn):
    task_parameters = TaskParameters(framework_type="tensorflow",
                                     experiment_path="./experiments/test")
    extra_params = {'save_checkpoint_secs': None,
                    'render': True}
    task_parameters.__dict__.update(extra_params)

    # Create a dictionary of parameters that Coach will handover to CNNEnvironment
    # Once it creates it.
    if True:
        exploration_noise = 0.5
        #exploration_noise = 0.25
        exploitation_decay = 0.996
        graph_manager.env_params.additional_simulator_parameters = {
            'model': model,
            'dataset': dataset,
            'arch': arch,
            'data_loader': data_loader,
            'validate_fn': validate_fn,
            'save_checkpoint_fn': save_checkpoint_fn,
            #'action_range': (0.10, 0.95),
            'action_range': (0.70, 0.95),
            'onehot_encoding': False,
            'normalize_obs': True,
            'desired_reduction': None,
            'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top5/100) * math.log(total_macs)
            #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs)
            #'reward_fn': lambda top1, total_macs: -1 * max(1-top1/100, 0.25) * math.log(total_macs)
            #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs/100000)
            #'reward_fn': lambda top1, total_macs:  top1/100 * total_macs/self.dense_model_macs
        }
    else:
        exploration_noise = 0.5
        #exploration_noise = 0.25
        exploitation_decay = 0.996
        graph_manager.env_params.additional_simulator_parameters = {
            'model': model,
            'dataset': dataset,
            'arch': arch,
            'data_loader': data_loader,
            'validate_fn': validate_fn,
            'save_checkpoint_fn': save_checkpoint_fn,
            'action_range': (0.10, 0.95),
            'onehot_encoding': False,
            'normalize_obs': True,
            'desired_reduction': 1.5e8,
            'reward_fn': lambda top1, total_macs: top1/100
            #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75)
        }

    #msglogger.debug('Experiment configuarion:\n' + json.dumps(graph_manager.env_params.additional_simulator_parameters, indent=2))
    steps_per_episode = 13
    agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([(ConstantSchedule(exploration_noise),
                                                                             EnvironmentSteps(100*steps_per_episode)),
                                                                            (ExponentialSchedule(exploration_noise, 0, exploitation_decay),
                                                                             EnvironmentSteps(300*steps_per_episode))])
    graph_manager.create_graph(task_parameters)
    graph_manager.improve()
Beispiel #3
0
def coach_adc(model, dataset, arch, optimizer_data, validate_fn, save_checkpoint_fn, train_fn):
    # task_parameters = TaskParameters(framework_type="tensorflow",
    #                                  experiment_path="./experiments/test")
    # extra_params = {'save_checkpoint_secs': None,
    #                 'render': True}
    # task_parameters.__dict__.update(extra_params)
    task_parameters = TaskParameters(experiment_path=logger.get_experiment_path('adc'))
    conv_cnt = count_conv_layer(model)

    # Create a dictionary of parameters that Coach will handover to CNNEnvironment
    # Once it creates it.
    services = distiller.utils.MutableNamedTuple({
                'validate_fn': validate_fn,
                'save_checkpoint_fn': save_checkpoint_fn,
                'train_fn': train_fn})

    app_args = distiller.utils.MutableNamedTuple({
                'dataset': dataset,
                'arch': arch,
                'optimizer_data': optimizer_data})
    if True:
        amc_cfg = distiller.utils.MutableNamedTuple({
                #'action_range': (0.20, 0.95),
                'action_range': (0.20, 0.80),
                'onehot_encoding': False,
                'normalize_obs': True,
                'desired_reduction': None,
                'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top1/100) * math.log(total_macs),
                'conv_cnt': conv_cnt,
                'max_reward': -1000})
    else:
        amc_cfg = distiller.utils.MutableNamedTuple({
                'action_range': (0.10, 0.95),
                'onehot_encoding': False,
                'normalize_obs': True,
                'desired_reduction': 1.5e8,
                'reward_fn': lambda top1, top5, vloss, total_macs: top1/100,
                #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75),
                'conv_cnt': conv_cnt,
                'max_reward': -1000})

    # These parameters are passed to the Distiller environment
    graph_manager.env_params.additional_simulator_parameters = {'model': model,
                                                                'app_args': app_args,
                                                                'amc_cfg': amc_cfg,
                                                                'services': services}
    exploration_noise = 0.5
    exploitation_decay = 0.996
    steps_per_episode = conv_cnt
    agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([
        (ConstantSchedule(exploration_noise), EnvironmentSteps(100*steps_per_episode)),
        (ExponentialSchedule(exploration_noise, 0, exploitation_decay), EnvironmentSteps(300*steps_per_episode))])
    graph_manager.create_graph(task_parameters)
    graph_manager.improve()
def test_exponential_schedule():
    # decreasing schedule
    schedule = ExponentialSchedule(10, 3, 0.99)

    current_power = 1
    for i in range(100):
        assert round(schedule.current_value,6) == round(10*current_power,6)
        current_power *= 0.99
        schedule.step()

    for i in range(100):
        schedule.step()
    assert schedule.current_value == 3
def test_piece_wise_schedule():
    # decreasing schedule
    schedule = PieceWiseSchedule(
        [(LinearSchedule(1, 3, 10), EnvironmentSteps(5)),
         (ConstantSchedule(4), EnvironmentSteps(10)),
         (ExponentialSchedule(3, 1, 0.99), EnvironmentSteps(10))
         ]
    )

    target_values = np.append(np.linspace(1, 2, 6), np.ones(11)*4)
    for i in range(16):
        assert round(schedule.current_value, 4) == round(target_values[i], 4)
        schedule.step()

    current_power = 1
    for i in range(10):
        assert round(schedule.current_value, 4) == round(3*current_power, 4)
        current_power *= 0.99
        schedule.step()
Beispiel #6
0
def do_adc_internal(model, args, optimizer_data, validate_fn,
                    save_checkpoint_fn, train_fn):
    dataset = args.dataset
    arch = args.arch
    perform_thinning = True  # args.amc_thinning
    num_ft_epochs = args.amc_ft_epochs
    action_range = args.amc_action_range
    np.random.seed()
    conv_cnt = count_conv_layer(model)

    msglogger.info("Executing AMC: RL agent - %s   RL library - %s",
                   args.amc_agent_algo, RLLIB)

    # Create a dictionary of parameters that Coach will handover to DistillerWrapperEnvironment
    # Once it creates it.
    services = distiller.utils.MutableNamedTuple({
        'validate_fn': validate_fn,
        'save_checkpoint_fn': save_checkpoint_fn,
        'train_fn': train_fn
    })

    app_args = distiller.utils.MutableNamedTuple({
        'dataset':
        dataset,
        'arch':
        arch,
        'optimizer_data':
        optimizer_data
    })

    amc_cfg = distiller.utils.MutableNamedTuple({
        'protocol':
        args.amc_protocol,
        'agent_algo':
        args.amc_agent_algo,
        'perform_thinning':
        perform_thinning,
        'num_ft_epochs':
        num_ft_epochs,
        'action_range':
        action_range,
        'conv_cnt':
        conv_cnt,
        'reward_frequency':
        args.amc_reward_frequency
    })

    #net_wrapper = NetworkWrapper(model, app_args, services)
    #return sample_networks(net_wrapper, services)

    if args.amc_protocol == "accuracy-guaranteed":
        amc_cfg.target_density = None
        amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: -(
            1 - top1 / 100) * math.log(total_macs)
        amc_cfg.action_constrain_fn = None
    elif args.amc_protocol == "mac-constrained":
        amc_cfg.target_density = args.amc_target_density
        amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: top1 / 100  #(90.5 - top1) / 10
        amc_cfg.action_constrain_fn = DistillerWrapperEnvironment.get_action
    elif args.amc_protocol == "mac-constrained-experimental":
        amc_cfg.target_density = args.amc_target_density
        amc_cfg.reward_fn = experimental_reward_fn
        amc_cfg.action_constrain_fn = None
    else:
        raise ValueError("{} is not supported currently".format(
            args.amc_protocol))

    steps_per_episode = conv_cnt
    if args.amc_agent_algo == "DDPG":
        amc_cfg.heatup_noise = 0.5
        amc_cfg.initial_training_noise = 0.5
        amc_cfg.training_noise_decay = 0.996  # 0.998
        amc_cfg.num_heatup_epochs = args.amc_heatup_epochs
        amc_cfg.num_training_epochs = args.amc_training_epochs
        training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode
        heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode

    if amc_cfg.agent_algo == "Random-policy":
        return random_agent(
            DistillerWrapperEnvironment(model, app_args, amc_cfg, services))

    if RLLIB == "spinup":
        msglogger.info("AMC: Using spinup")
        env1 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services)
        env2 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services)
        ddpg_spinup(env1, env2)
    else:
        msglogger.info("AMC: Using coach")

        # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct
        # Coach to create and use our DistillerWrapperEnvironment environment.
        # So Distiller calls Coach, which creates the environment, trains the agent, and ends.
        if args.amc_agent_algo == "DDPG":
            from examples.automated_deep_compression.presets.ADC_DDPG import graph_manager, agent_params
            agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule(
                [(ConstantSchedule(amc_cfg.heatup_noise),
                  EnvironmentSteps(heatup_duration)),
                 (ExponentialSchedule(amc_cfg.initial_training_noise, 0,
                                      amc_cfg.training_noise_decay),
                  EnvironmentSteps(training_noise_duration))])
            # agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0)
        elif "ClippedPPO" in args.amc_agent_algo:
            from examples.automated_deep_compression.presets.ADC_ClippedPPO import graph_manager, agent_params

        # These parameters are passed to the Distiller environment
        graph_manager.env_params.additional_simulator_parameters = {
            'model': model,
            'app_args': app_args,
            'amc_cfg': amc_cfg,
            'services': services
        }

        coach_logs_dir = os.path.join(msglogger.logdir, 'coach')
        os.mkdir(coach_logs_dir)
        task_parameters = TaskParameters(experiment_path=coach_logs_dir)
        graph_manager.create_graph(task_parameters)
        graph_manager.improve()
Beispiel #7
0
agent_params.network_wrappers['actor'].heads_parameters[
    0].activation_function = 'sigmoid'
#agent_params.network_wrappers['critic'].clip_gradients = 100
#agent_params.network_wrappers['actor'].clip_gradients = 100

agent_params.algorithm.rate_for_copying_weights_to_target = 0.01  # Tau pg. 11
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(
    1)
agent_params.algorithm.discount = 1
agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000)
agent_params.exploration = TruncatedNormalParameters(
)  # AdditiveNoiseParameters()
steps_per_episode = 13
agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([
    (ConstantSchedule(0.5), EnvironmentSteps(100 * steps_per_episode)),
    (ExponentialSchedule(0.5, 0,
                         0.996), EnvironmentSteps(300 * steps_per_episode))
])
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
agent_params.input_filter = MujocoInputFilter()
agent_params.output_filter = MujocoOutputFilter()
agent_params.network_wrappers['actor'].learning_rate = 0.0001
agent_params.network_wrappers['critic'].learning_rate = 0.001

##############################
#      Gym                   #
##############################
env_params = GymEnvironmentParameters()
env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment'

vis_params = VisualizationParameters()
vis_params.dump_parameters_documentation = False