def get_network_and_environment_creator(args, random_seed=3): env_creator = environment_creator.EnvironmentCreator(args) num_actions = env_creator.num_actions args.num_actions = num_actions args.random_seed = random_seed network_conf = { 'name': "local_learning", 'num_actions': num_actions, 'entropy_regularisation_strength': args.entropy_regularisation_strength, 'device': args.device, 'emulator_counts': args.emulator_counts, 'clip_norm': args.clip_norm, 'clip_norm_type': args.clip_norm_type, 'initial_lr': args.initial_lr, 'e': args.e, 'alpha': args.alpha, 'T': args.T, 'latent_shape': args.latent_shape, 'ae_arch': args.ae_arch, 'bonus_type': args.bonus_type, 'num_heads': args.num_heads } if args.arch == 'NIPS': network = NIPSPolicyVNetwork(network_conf) elif args.arch == 'SURP': network = ModelBasedPolicyVNetwork(network_conf) else: network = NaturePolicyVNetwork(network_conf) return network, env_creator
def get_network_and_environment_creator(args, random_seed=3): env_creator = environment_creator.EnvironmentCreator(args) num_actions = env_creator.num_actions args.num_actions = num_actions args.random_seed = random_seed network_conf = { 'num_actions': num_actions, 'entropy_regularisation_strength': args.entropy_regularisation_strength, 'device': args.device, 'clip_norm': args.clip_norm, 'clip_norm_type': args.clip_norm_type } if args.arch == 'NIPS': network = NIPSPolicyVNetwork else: network = NaturePolicyVNetwork def network_creator(name='local_learning'): nonlocal network_conf copied_network_conf = copy.copy(network_conf) copied_network_conf['name'] = name return network(copied_network_conf) return network_creator, env_creator
def get_network_and_environment_creator(args, random_seed=3): env_creator = environment_creator.EnvironmentCreator(args) num_actions = env_creator.num_actions args.num_actions = num_actions args.random_seed = random_seed network_conf = {'num_actions': num_actions, 'entropy_regularisation_strength': args.entropy_regularisation_strength, 'device': args.device, 'clip_norm': args.clip_norm, 'clip_norm_type': args.clip_norm_type} if args.arch == 'NIPS': network = NIPSPolicyVNetwork else: network = NaturePolicyVNetwork def network_creator(name='local_learning'): nonlocal network_conf copied_network_conf = copy.copy(network_conf) copied_network_conf['name'] = name copied_network_conf['event'] = args.event copied_network_conf['event_type'] = args.event_type copied_network_conf['per_channel'] = args.per_channel copied_network_conf['batch_norm'] = args.batch_norm copied_network_conf['norm'] = args.norm copied_network_conf['convnet'] = args.convnet copied_network_conf['is_noisy'] = args.is_noisy copied_network_conf['spatial_pooling'] = args.spatial_pooling copied_network_conf['seperate_stream'] = args.seperate_stream return network(copied_network_conf) return network_creator, env_creator
def main(args): if args.resume: conf_file = os.path.join(args.debugging_folder, 'args.json') assert os.path.exists( conf_file ), "Could not find an args.json file in the debugging folder" for k, v in logger_utils.load_args(args.debugging_folder).items(): setattr(args, k, v) logger.debug('Configuration: {}'.format(args)) logger_utils.save_args(args, args.debugging_folder) if 'gpu' in args.device: agent_gpu = str(misc_utils.pick_gpu_lowest_memory()) os.environ["CUDA_VISIBLE_DEVICES"] = agent_gpu logger.debug('Agent will be run on device /gpu:{}'.format(agent_gpu)) args.random_seed = 3 # random_seed env_creator = environment_creator.EnvironmentCreator(args) args.num_actions = env_creator.num_actions args.state_shape = env_creator.state_shape import numpy as np # Create a set of arrays (as many as emulators) to exchange states, rewards, etc. between the agent and the emulator n_emulators = args.n_emulator_runners * args.n_emulators_per_emulator_runner variables = { "s": np.zeros((n_emulators, ) + args.state_shape, dtype=np.float32), "a": np.zeros((n_emulators), dtype=np.int32), # Actions "r": np.zeros((n_emulators), dtype=np.float32), # Rewards "done": np.zeros((n_emulators), dtype=np.bool) } # Dones sim_coordinator = SimulatorsCoordinator( env_creator, args.n_emulators_per_emulator_runner, args.n_emulator_runners, variables) # Start all simulator processes sim_coordinator.start() network = QNetwork def network_creator(name='value_learning', learning_network=None): nonlocal args args.name = name return network(args, learning_network=learning_network) learner = PDQFDLearner(network_creator, env_creator, args, sim_coordinator) setup_kill_signal_handler(learner) logger.info('Starting training') learner.train() logger.info('Finished training')
def get_network_and_environment_creator(args, explo_policy, random_seed=3): env_creator = environment_creator.EnvironmentCreator(args) num_actions = env_creator.num_actions args.num_actions = num_actions args.random_seed = random_seed network_conf = { 'num_actions': num_actions, 'entropy_regularisation_strength': args.entropy_regularisation_strength, 'device': args.device, 'clip_norm': args.clip_norm, 'clip_norm_type': args.clip_norm_type, 'softmax_temp': explo_policy.softmax_temp, 'keep_percentage': explo_policy.keep_percentage, 'rgb': args.rgb, 'activation': args.activation, 'alpha_leaky_relu': args.alpha_leaky_relu, 'max_repetition': args.max_repetition, 'nb_choices': args.nb_choices } if args.arch == 'PWYX': network = PpwwyyxxPolicyVNetwork elif args.arch == 'LSTM': network = LSTMPolicyNetwork elif args.arch == 'BAYESIAN': network = BayesianPolicyVNetwork elif args.arch == 'NIPS': network = NIPSPolicyVNetwork else: network = NaturePolicyVNetwork def network_creator(name='local_learning'): nonlocal network_conf copied_network_conf = copy.copy(network_conf) copied_network_conf['name'] = name return network(copied_network_conf) return network_creator, env_creator
def __init__(self, args): env_creator = environment_creator.EnvironmentCreator(args) self.num_actions = env_creator.num_actions args.num_actions = self.num_actions self.folder = args.folder self.checkpoint = os.path.join(args.folder, 'checkpoints', 'checkpoint-' + str(args.index)) self.noops = args.noops self.poison = args.poison self.pixels_to_poison = args.pixels_to_poison self.color = args.color self.action = args.action self.test_count = args.test_count self.store = args.store self.store_name = args.store_name self.state_index = [0 for _ in range(args.test_count)] self.poison_randomly = args.poison_randomly self.poison_some = args.poison_some self.start_at = [ self.noops + random.randint(0, 200) for _ in range(args.test_count) ] self.end_at = [self.start_at[i] for i in range(args.test_count)] if self.poison_some: self.end_at = [ self.start_at[i] + self.poison_some for i in range(args.test_count) ] self.global_steps = [self.poison_some] * args.test_count print(self.start_at, self.end_at) # configuration network_conf = { 'num_actions': self.num_actions, 'device': '/gpu:0', # these don't matter 'clip_norm': 3.0, 'entropy_regularisation_strength': 0.02, 'clip_norm_type': 'global', 'name': 'local_learning' } # create network if args.arch == 'NIPS': self.network = NIPSPolicyVNetwork(network_conf) else: self.network = NaturePolicyVNetwork(network_conf) self.environments = [ env_creator.create_environment(i) for i in range(args.test_count) ] self.states = np.zeros([args.test_count, 84, 84, 4]) self.rgb_states = np.zeros([args.test_count, 210, 160, 3]) self.action_distribution = np.zeros(env_creator.num_actions) self.episodes_over = np.zeros(args.test_count, dtype=np.bool) self.rewards = np.zeros(args.test_count, dtype=np.float32) self.start_time = [time.time() for _ in range(args.test_count)] self.total_poisoning = np.zeros(args.test_count) self.target_action = np.zeros(args.test_count) self.current_lives = [env.lives for env in self.environments] self.condition_of_poisoning = [True for _ in range(args.test_count)] self.set_start = [True for _ in range(args.test_count)] self.elapsed_time = np.zeros(args.test_count) self.window = args.window self.gif_name = args.gif_name self.video_name = args.video_name self.state_id = 0 if args.video_name: folder = os.path.join(args.folder, args.media_folder) if not os.path.exists(folder): os.makedirs(folder) height = 210 width = 160 pathname = os.path.join(folder, args.video_name + str(0)) fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') fps = 20 video_filename = pathname + '.mp4' self.out = cv2.VideoWriter(video_filename, fourcc, fps, (width, height)) if args.gif_name: for i, environment in enumerate(self.environments): environment.on_new_frame = self.get_save_frame( os.path.join(args.folder, args.media_folder), args.gif_name, i)