コード例 #1
0
def main(args):
	assert args.population >= 10, 'Population size should not be less than 10. Exiting.'
	assert args.generations >= 1, 'Number of generations should not be less than 1. Exiting.'
	assert args.trials >= 1, 'Number of trials should not be less than 1. Exiting.'
	assert args.episodes >= 1, 'Number of episodes should not be less than 1. Exiting.'
	assert args.save_interval >= 0, 'Number of generations should not be less than 0. Exiting.'
	assert args.num_workers >= 1, 'Number of workers should not be less than 1. Exiting.'
	# set seed
	np.random.seed(args.seed)
	torch.manual_seed(args.seed)
	# params
	visualise = not args.no_visualise
	hebb_plastic = not args.no_hebbian_plasticity
	dynamic_goal = not args.no_dynamic_goal
	m_ = 2 if args.episodes < 20 else 15
	if dynamic_goal: swap_range = int((args.episodes/2.) - m_), int((args.episodes/2.) + m_+1)
	else: swap_range = None
	# create environment 
	# and optionally pool of worker envs if args.num_workers > 1 (for multiprocessing)
	env_fn = make_env(args.env_config_path)
	env, env_conf = env_fn()
	if args.num_workers > 1: pool = PoolManager_v2(env_fn, args.num_workers)
	else: pool = None
	# get all goals (reward locations) in the environment.
	goals = get_goals(env_conf['graph_shape']['depth'])
	if not env_conf['image_dataset']['1D']: # observations are 2D images
		# set up feature extractor model that serves all (evolved) agent controller network.
		# it extracts features used as input to the evolved controllers.
		obs_dim = int(np.prod(env.observation_space.shape)) # 144. each observation is a 12 x 12 image
		layers = [obs_dim, 64, 16]
		fe_model = AutoEncoderFeatureExtractor(layers)
		latent_dim = controller_input_dim = fe_model.get_latent_features_dim()
		#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
		device = torch.device('cpu')
		fe_model.to(device)
		buffer_size = args.population * args.episodes * 10
		obs_buffer = ObservationBuffer(buffer_size, (obs_dim, ))
		args.feature_extractor = {'layers': layers, 'type': 'fc_autoencoder'}
		fe_trainer = TrainerFeatExtAE(fe_model, obs_buffer, 0.001, 20, 1.0, device)
	else:
		raise ValueError('CTgraph should be configured to produce image observations')
	# set up evolution (evolution of network controller/agent)
	args.agent['n_input_neurons'] = controller_input_dim
	args.agent['plasticity'] = hebb_plastic
	evo = Evolution(Agent, args.agent, args.population)
	# instantiate log
	unique_name = 'ctgraph'
	exp_name = 'train-s{0}-depth{1}'.format(args.seed, env_conf['graph_shape']['depth'])
	exp_name = '{0}-p{1}-g{2}{3}'.format(exp_name, args.population, args.generations, unique_name)
	log = Log('./log/'+exp_name)
	# logs
	log.info('General program Log')
	log.info('goal swap range: {0}'.format(swap_range))
	modeldir_path = log.get_modeldir_path()
	visdir_path = log.get_visdir_path()
	# save experiment config
	exp_config = {}
	exp_config['environment'] = env_conf
	exp_config['others'] = vars(args)
	f = open('{0}/config.json'.format(log.get_logdir_path()), 'w')
	json.dump(exp_config, f, indent=4)
	f.close()

	trials_goals = []
	# train model (evolve controllers)
	# optionally sgd optimise feature extractor if env observations are 2d
	for generation in np.arange(args.generations):
		start_time = datetime.datetime.now()
		log.info('generation {0}'.format(generation))
		# determine swap point(s) and goal(s) for current generation
		if dynamic_goal:
			swap_points = np.random.randint(low=swap_range[0], high=swap_range[1], size=args.trials)
			trials_goals = []
			for i in np.arange(args.trials):
				goal = sample_goal(goals)
				next_goal = sample_goal(goals, prev_goal=goal)
				trials_goals.append((goal, next_goal))
				log.info('trial {0} goals: {1}'.format(i+1, (goal, next_goal)))
			log.info('swap points: {0}'.format(swap_points))
		else:
			swap_points = None
			trials_goals = []
			for i in np.arange(args.trials):
				goal = sample_goal(goals)
				log.info('trial {0} goal: {1}.'.format(i, goal))
				trials_goals.append((goal,))
		# evaluate fitness - each agent fitness is its average reward across trials
		agents = evo.get_all_individuals()
		if args.num_workers > 1:
			# create a clone of feature extractor and pass to method below.
			# this is a trick to solve the issue of pytorch raising an error about serialising
			# a non-leaf tensor that requires_grad. we need to pass the feature extractor to 
			# worker processes and this error occurs after the first generation.
			if fe_model is not None:
				fe_model_ = type(fe_model)(fe_model.layers_dim)
				fe_model_.load_state_dict(fe_model.state_dict())
			else:
				fe_model_ = None
			pool.evaluate_agents(agents, args.trials, args.episodes, trials_goals, swap_points,\
				fe_model_, device, obs_buffer)
			fe_model_ = None
			gc.collect()
		else:
			evaluate_agents(agents, env, args.trials, args.episodes, trials_goals, swap_points,\
				fe_model, device, obs_buffer)
		# log summary, model and generate network visualisation
		if args.save_interval > 0 and generation % args.save_interval == 0:
			top_agents = evo.get_n_fittest_individuals(n = args.top_n)
			top_agents_reward = [agent.get_reward() for agent in top_agents]
			best_fitness = top_agents_reward[0]
			log.info('top {0} agents: {1}'.format(args.top_n, np.array(top_agents_reward)))
			# write generation summary to logs (and screen)
			log.summary(generation, best_fitness, evo.get_worst_fitness(), evo.get_fitness_mean(),\
				evo.get_fitness_std())
			# save model of the best agent and visualisation its phenotype/network.
			save_agent(top_agents[0], modeldir_path + 'gen-{0}-best.npy'.format(generation))
			if fe_model is not None:
				# save feature extractor
				state_dict_ = fe_model.state_dict()
				torch.save(state_dict_, modeldir_path + 'gen-{0}-femodel.pt'.format(generation))
			if visualise:
				# save controller/agent visualisation
				top_agents[0].draw_network(visdir_path +'gen-{0}-best'.format(generation))
		if generation == args.generations - 1:
			end_time = datetime.datetime.now()
			log.info('time taken: {0}\n\n'.format(str(end_time - start_time)))
			break
		else:
			evo.selection()
			evo.produce_next_generation()
			if fe_model is not None:
				fe_trainer.train(epochs=20)
			end_time = datetime.datetime.now()
			log.info('time taken: {0}\n\n'.format(str(end_time - start_time)))

	if pool is not None:
		pool.close()
	log.info('---Training over.---')
	log.close()
コード例 #2
0
ファイル: train_mcaltenv.py プロジェクト: dlpbc/penn-a
def main(args):
	assert args.population >= 10, 'Population size should not be less than 10. Exiting.'
	assert args.generations >= 1, 'Number of generations should not be less than 1. Exiting.'
	assert args.trials >= 1, 'Number of trials should not be less than 1. Exiting.'
	assert args.episodes >= 20, 'Number of episodes should not be less than 20. Exiting.'
	assert args.save_interval >= 0, 'Number of generations should not be less than 0. Exiting.'
	assert args.num_workers >= 1, 'Number of workers should not be less than 1. Exiting.'
	# set seed
	np.random.seed(args.seed)
	torch.manual_seed(args.seed)
	# params
	visualise = not args.no_visualise
	hebb_plastic = not args.no_hebbian_plasticity
	dynamic_goal = not args.no_dynamic_goal
	num_trial_swaps = 2 # NOTE for multi goal swaps per trial
	args.num_trial_swaps = num_trial_swaps
	if dynamic_goal:
		segment_duration = int(args.episodes / (num_trial_swaps + 1))
		if segment_duration < 10: m_ = 2
		elif 10 <= segment_duration < 20: m_ = 5
		elif 20 <= segment_duration < 30: m_ = 10
		else: m_ = 15
		swap_range = []
		curr_seg = 0
		for _ in range(num_trial_swaps):
			curr_seg += segment_duration
			swap_range.append((curr_seg-m_, curr_seg+m_+1))
	else: swap_range = None
	# create environment 
	# and optionally pool of worker envs if args.num_workers > 1 (for multiprocessing)
	env, env_conf = make_env()
	# multiprocessing
	if args.num_workers > 1: pool = PoolManager_v2(make_env, args.num_workers)
	else: pool = None
	# enviroment goals
	goals = env.get_goals()
	# set up feature extractor model that serves all (evolved) agent controller network.
	# it extracts features used as input to the evolved controllers.
	obs_dim = env.observation_space.shape # for conv feature extractor
	fe_model = ConvAutoEncoderFeatureExtractor()
	latent_dim = controller_input_dim = fe_model.get_latent_features_dim()
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	#device = torch.device('cpu')
	fe_model.to(device)
	buffer_size = args.population * args.episodes
	obs_buffer = ObservationBuffer(buffer_size, obs_dim) # conv nn feature extractor
	args.feature_extractor = {'layers': 'N/A', 'type': 'conv_autoencoder'}
	fe_trainer = TrainerFeatExtAE(fe_model, obs_buffer, 0.0005, 20, 1.0, device)
	# set up evolution (evolution of network controller/agent)
	args.agent['n_input_neurons'] = controller_input_dim
	args.agent['plasticity'] = hebb_plastic
	evo = Evolution(Agent, args.agent, args.population)
	# instantiate log
	unique_name = 'mcaltenv'
	exp_name = 'train-s{0}'.format(args.seed)
	exp_name = '{0}-p{1}-g{2}{3}'.format(exp_name, args.population, args.generations, unique_name)
	log = Log('./log/'+exp_name)
	# logs
	log.info('General program Log')
	log.info('goal swap range: {0}'.format(swap_range))
	modeldir_path = log.get_modeldir_path()
	visdir_path = log.get_visdir_path()
	# save experiment config
	exp_config = {}
	exp_config['environment'] = env_conf
	exp_config['others'] = vars(args)
	f = open('{0}/config.json'.format(log.get_logdir_path()), 'w')
	json.dump(exp_config, f, indent=4)
	f.close()

	trials_goals = []
	# train model (evolve controllers)
	# rmsprop optimise feature extractor
	for generation in np.arange(args.generations):
		start_time = datetime.datetime.now()
		log.info('generation {0}'.format(generation))
		# determine swap point(s) and goal(s) for current generation
		if dynamic_goal:
			swap_points = []
			for r in swap_range:
				swap_points.append(np.random.randint(low=r[0], high=r[1], size=args.trials))
			swap_points = np.array(swap_points)
			swap_points = swap_points.T # transpose from swap range x trials to the reverse

			trials_goals = []
			for i in np.arange(args.trials):
				goal = None
				trial_goals = []
				for j in np.arange(num_trial_swaps+1):
					goal = sample_goal(goals, prev_goal=goal)
					trial_goals.append(goal)
				trials_goals.append(tuple(trial_goals))
				log.info('trial {0} goals: {1}'.format(i+1, tuple(trial_goals)))
			log.info('swap points: {0}'.format(swap_points))
		else:
			swap_points = None
			trials_goals = []
			for i in np.arange(args.trials):
				goal = sample_goal(goals)
				log.info('trial {0} goal: {1}.'.format(i, goal))
				trials_goals.append((goal,))
		# evaluate fitness - each agent fitness is its average reward across trials
		agents = evo.get_all_individuals()
		if args.num_workers > 1:
			# create a clone of feature extractor and pass to method below.
			# this is a trick to solve the issue of pytorch raising an error about serialising
			# a non-leaf tensor that requires_grad. we need to pass the feature extractor to 
			# worker processes and this error occurs after the first generation.
			if fe_model is not None:
				fe_model_ = type(fe_model)()
				fe_model_.to(device)
				fe_model_.load_state_dict(fe_model.state_dict())
			else:
				fe_model_ = None
			pool.evaluate_agents(agents, args.trials, args.episodes, trials_goals, swap_points,\
				fe_model_, device, obs_buffer)
			# free up memory
			fe_model_ = None
			gc.collect()
		else:
			evaluate_agents(agents, env, args.trials, args.episodes, trials_goals, swap_points,\
				fe_model, device, obs_buffer, xml_goalelem=None)
		# log summary, model and generate network visualisation
		if args.save_interval > 0 and generation % args.save_interval == 0:
			top_agents = evo.get_n_fittest_individuals(n = args.top_n)
			top_agents_reward = [agent.get_reward() for agent in top_agents]
			best_fitness = top_agents_reward[0]
			log.info('top {0} agents: {1}'.format(args.top_n, np.array(top_agents_reward)))
			# write generation summary to logs (and screen)
			log.summary(generation, best_fitness, evo.get_worst_fitness(), evo.get_fitness_mean(),\
				evo.get_fitness_std())
			# save model of the best agent and visualisation its phenotype/network.
			save_agent(top_agents[0], modeldir_path + 'gen-{0}-best.npy'.format(generation))
			if fe_model is not None:
				# save feature extractor
				state_dict_ = fe_model.state_dict()
				torch.save(state_dict_, modeldir_path + 'gen-{0}-femodel.pt'.format(generation))
			if visualise:
				# save agent visualisation
				top_agents[0].draw_network(visdir_path +'gen-{0}-best'.format(generation), prune=True)
		if generation == args.generations - 1:
			end_time = datetime.datetime.now()
			log.info('time taken: {0}\n\n'.format(str(end_time - start_time)))
			break
		else:
			evo.selection()
			evo.produce_next_generation()
			if fe_model is not None:
				fe_trainer.train(epochs=20)
			end_time = datetime.datetime.now()
			log.info('time taken: {0}\n\n'.format(str(end_time - start_time)))

	if pool is not None:
		pool.close()
	log.info('---Training over.---')
	log.close()

	return