Esempio n. 1
0
def get_network_and_environment_creator(args, random_seed=3):
    env_creator = environment_creator.EnvironmentCreator(args)
    num_actions = env_creator.num_actions
    args.num_actions = num_actions
    args.random_seed = random_seed

    network_conf = {
        'name': "local_learning",
        'num_actions': num_actions,
        'entropy_regularisation_strength':
        args.entropy_regularisation_strength,
        'device': args.device,
        'emulator_counts': args.emulator_counts,
        'clip_norm': args.clip_norm,
        'clip_norm_type': args.clip_norm_type,
        'initial_lr': args.initial_lr,
        'e': args.e,
        'alpha': args.alpha,
        'T': args.T,
        'latent_shape': args.latent_shape,
        'ae_arch': args.ae_arch,
        'bonus_type': args.bonus_type,
        'num_heads': args.num_heads
    }
    if args.arch == 'NIPS':
        network = NIPSPolicyVNetwork(network_conf)
    elif args.arch == 'SURP':
        network = ModelBasedPolicyVNetwork(network_conf)
    else:
        network = NaturePolicyVNetwork(network_conf)
    return network, env_creator
Esempio n. 2
0
def get_network_and_environment_creator(args, random_seed=3):
    env_creator = environment_creator.EnvironmentCreator(args)
    num_actions = env_creator.num_actions
    args.num_actions = num_actions
    args.random_seed = random_seed

    network_conf = {
        'num_actions': num_actions,
        'entropy_regularisation_strength':
        args.entropy_regularisation_strength,
        'device': args.device,
        'clip_norm': args.clip_norm,
        'clip_norm_type': args.clip_norm_type
    }
    if args.arch == 'NIPS':
        network = NIPSPolicyVNetwork
    else:
        network = NaturePolicyVNetwork

    def network_creator(name='local_learning'):
        nonlocal network_conf
        copied_network_conf = copy.copy(network_conf)
        copied_network_conf['name'] = name
        return network(copied_network_conf)

    return network_creator, env_creator
Esempio n. 3
0
def get_network_and_environment_creator(args, random_seed=3):
    env_creator = environment_creator.EnvironmentCreator(args)
    num_actions = env_creator.num_actions
    args.num_actions = num_actions
    args.random_seed = random_seed

    network_conf = {'num_actions': num_actions,
                    'entropy_regularisation_strength': args.entropy_regularisation_strength,
                    'device': args.device,
                    'clip_norm': args.clip_norm,
                    'clip_norm_type': args.clip_norm_type}
    if args.arch == 'NIPS':
        network = NIPSPolicyVNetwork
    else:
        network = NaturePolicyVNetwork

    def network_creator(name='local_learning'):
        nonlocal network_conf
        copied_network_conf = copy.copy(network_conf)
        copied_network_conf['name'] = name
        
        copied_network_conf['event'] = args.event
        copied_network_conf['event_type'] = args.event_type
        copied_network_conf['per_channel'] = args.per_channel
        copied_network_conf['batch_norm'] = args.batch_norm
        copied_network_conf['norm'] = args.norm
        copied_network_conf['convnet'] = args.convnet
        copied_network_conf['is_noisy'] = args.is_noisy
        copied_network_conf['spatial_pooling'] = args.spatial_pooling
        copied_network_conf['seperate_stream'] = args.seperate_stream
        return network(copied_network_conf)

    return network_creator, env_creator
Esempio n. 4
0
def main(args):
    if args.resume:
        conf_file = os.path.join(args.debugging_folder, 'args.json')
        assert os.path.exists(
            conf_file
        ), "Could not find an args.json file in the debugging folder"
        for k, v in logger_utils.load_args(args.debugging_folder).items():
            setattr(args, k, v)

    logger.debug('Configuration: {}'.format(args))
    logger_utils.save_args(args, args.debugging_folder)
    if 'gpu' in args.device:
        agent_gpu = str(misc_utils.pick_gpu_lowest_memory())
        os.environ["CUDA_VISIBLE_DEVICES"] = agent_gpu
        logger.debug('Agent will be run on device /gpu:{}'.format(agent_gpu))

    args.random_seed = 3  # random_seed
    env_creator = environment_creator.EnvironmentCreator(args)
    args.num_actions = env_creator.num_actions
    args.state_shape = env_creator.state_shape

    import numpy as np
    # Create a set of arrays (as many as emulators) to exchange states, rewards, etc. between the agent and the emulator
    n_emulators = args.n_emulator_runners * args.n_emulators_per_emulator_runner
    variables = {
        "s": np.zeros((n_emulators, ) + args.state_shape, dtype=np.float32),
        "a": np.zeros((n_emulators), dtype=np.int32),  # Actions
        "r": np.zeros((n_emulators), dtype=np.float32),  # Rewards
        "done": np.zeros((n_emulators), dtype=np.bool)
    }  # Dones
    sim_coordinator = SimulatorsCoordinator(
        env_creator, args.n_emulators_per_emulator_runner,
        args.n_emulator_runners, variables)
    # Start all simulator processes
    sim_coordinator.start()

    network = QNetwork

    def network_creator(name='value_learning', learning_network=None):
        nonlocal args
        args.name = name
        return network(args, learning_network=learning_network)

    learner = PDQFDLearner(network_creator, env_creator, args, sim_coordinator)

    setup_kill_signal_handler(learner)

    logger.info('Starting training')
    learner.train()
    logger.info('Finished training')
Esempio n. 5
0
def get_network_and_environment_creator(args, explo_policy, random_seed=3):
    env_creator = environment_creator.EnvironmentCreator(args)
    num_actions = env_creator.num_actions
    args.num_actions = num_actions
    args.random_seed = random_seed

    network_conf = {
        'num_actions': num_actions,
        'entropy_regularisation_strength':
        args.entropy_regularisation_strength,
        'device': args.device,
        'clip_norm': args.clip_norm,
        'clip_norm_type': args.clip_norm_type,
        'softmax_temp': explo_policy.softmax_temp,
        'keep_percentage': explo_policy.keep_percentage,
        'rgb': args.rgb,
        'activation': args.activation,
        'alpha_leaky_relu': args.alpha_leaky_relu,
        'max_repetition': args.max_repetition,
        'nb_choices': args.nb_choices
    }

    if args.arch == 'PWYX':
        network = PpwwyyxxPolicyVNetwork
    elif args.arch == 'LSTM':
        network = LSTMPolicyNetwork
    elif args.arch == 'BAYESIAN':
        network = BayesianPolicyVNetwork
    elif args.arch == 'NIPS':
        network = NIPSPolicyVNetwork
    else:
        network = NaturePolicyVNetwork

    def network_creator(name='local_learning'):
        nonlocal network_conf
        copied_network_conf = copy.copy(network_conf)
        copied_network_conf['name'] = name
        return network(copied_network_conf)

    return network_creator, env_creator
Esempio n. 6
0
    def __init__(self, args):

        env_creator = environment_creator.EnvironmentCreator(args)
        self.num_actions = env_creator.num_actions
        args.num_actions = self.num_actions

        self.folder = args.folder
        self.checkpoint = os.path.join(args.folder, 'checkpoints',
                                       'checkpoint-' + str(args.index))
        self.noops = args.noops
        self.poison = args.poison
        self.pixels_to_poison = args.pixels_to_poison
        self.color = args.color
        self.action = args.action
        self.test_count = args.test_count
        self.store = args.store
        self.store_name = args.store_name
        self.state_index = [0 for _ in range(args.test_count)]
        self.poison_randomly = args.poison_randomly
        self.poison_some = args.poison_some
        self.start_at = [
            self.noops + random.randint(0, 200) for _ in range(args.test_count)
        ]
        self.end_at = [self.start_at[i] for i in range(args.test_count)]
        if self.poison_some:
            self.end_at = [
                self.start_at[i] + self.poison_some
                for i in range(args.test_count)
            ]
        self.global_steps = [self.poison_some] * args.test_count

        print(self.start_at, self.end_at)
        # configuration
        network_conf = {
            'num_actions': self.num_actions,
            'device': '/gpu:0',
            # these don't matter
            'clip_norm': 3.0,
            'entropy_regularisation_strength': 0.02,
            'clip_norm_type': 'global',
            'name': 'local_learning'
        }

        # create network
        if args.arch == 'NIPS':
            self.network = NIPSPolicyVNetwork(network_conf)
        else:
            self.network = NaturePolicyVNetwork(network_conf)

        self.environments = [
            env_creator.create_environment(i) for i in range(args.test_count)
        ]
        self.states = np.zeros([args.test_count, 84, 84, 4])
        self.rgb_states = np.zeros([args.test_count, 210, 160, 3])
        self.action_distribution = np.zeros(env_creator.num_actions)
        self.episodes_over = np.zeros(args.test_count, dtype=np.bool)
        self.rewards = np.zeros(args.test_count, dtype=np.float32)
        self.start_time = [time.time() for _ in range(args.test_count)]

        self.total_poisoning = np.zeros(args.test_count)
        self.target_action = np.zeros(args.test_count)
        self.current_lives = [env.lives for env in self.environments]
        self.condition_of_poisoning = [True for _ in range(args.test_count)]
        self.set_start = [True for _ in range(args.test_count)]
        self.elapsed_time = np.zeros(args.test_count)

        self.window = args.window
        self.gif_name = args.gif_name
        self.video_name = args.video_name
        self.state_id = 0

        if args.video_name:
            folder = os.path.join(args.folder, args.media_folder)
            if not os.path.exists(folder):
                os.makedirs(folder)
            height = 210
            width = 160
            pathname = os.path.join(folder, args.video_name + str(0))
            fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
            fps = 20
            video_filename = pathname + '.mp4'
            self.out = cv2.VideoWriter(video_filename, fourcc, fps,
                                       (width, height))

        if args.gif_name:
            for i, environment in enumerate(self.environments):
                environment.on_new_frame = self.get_save_frame(
                    os.path.join(args.folder, args.media_folder),
                    args.gif_name, i)