Beispiel #1
0
    # INITIALIZE THE MAIN AGENT CLASS
    ai = MERL(args)
    print('Running ', args.config.env_choice, 'with config ',
          args.config.config, ' State_dim:', args.state_dim, 'Action_dim',
          args.action_dim)
    time_start = time.time()

    ###### TRAINING LOOP ########
    for gen in range(1, 10000000000):  # RUN VIRTUALLY FOREVER

        # ONE EPOCH OF TRAINING
        popn_fits, pg_fits, test_fits = ai.train(gen, test_tracker)

        # PRINT PROGRESS
        print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:',
              mod.list_stat(popn_fits), 'PG_stat:', mod.list_stat(pg_fits),
              'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:',
              pprint(ai.total_frames / (time.time() - time_start)), 'Evo',
              args.scheme, 'PS:', args.ps)

        if gen % 5 == 0:
            print()
            print('Test_stat:', mod.list_stat(test_fits), 'SAVETAG:  ',
                  args.savetag)
            print('Weight Stats: min/max/average',
                  pprint(ai.test_bucket[0].get_norm_stats()))
            print('Buffer Lens:',
                  [ag.buffer[0].__len__() for ag in ai.agents] if args.ps
                  == 'trunk' else [ag.buffer.__len__() for ag in ai.agents])
            print()
Beispiel #2
0
	if args.config.env_choice == 'hyper': from envs.hyper.PowerPlant_env import Fast_Simulator  # Main Module needs access to this class for some reason

	# INITIALIZE THE MAIN AGENT CLASS
	ai = MERL(args)
	print('Running ', args.config.env_choice, 'with config ', args.config.config, ' State_dim:', args.state_dim,
	      'Action_dim', args.action_dim)
	time_start = time.time()

	###### TRAINING LOOP ########
	for gen in range(1, 10000000000):  # RUN VIRTUALLY FOREVER

		# ONE EPOCH OF TRAINING
		popn_fits, pg_fits, test_fits = ai.train(gen, test_tracker)

		# PRINT PROGRESS
		print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:',
		      mod.list_stat(pg_fits),
		      'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:',
		      pprint(ai.total_frames / (time.time() - time_start)), 'Evo', args.scheme, 'PS:', args.ps
		      )

		if gen % 5 == 0:
			print()
			print('Test_stat:', mod.list_stat(test_fits), 'SAVETAG:  ', args.savetag)
			print('Weight Stats: min/max/average', pprint(ai.test_bucket[0].get_norm_stats()))
			print('Buffer Lens:', [ag.buffer[0].__len__() for ag in ai.agents] if args.ps == 'trunk' else [ag.buffer.__len__() for ag in ai.agents])
			print()

		if gen % 10 == 0 and args.rollout_size > 0:
			print()
			print('Q', pprint(ai.agents[0].algo.q))
Beispiel #3
0
	if args.config.env_choice == 'hyper': from envs.hyper.PowerPlant_env import Fast_Simulator  # Main Module needs access to this class for some reason

	# INITIALIZE THE MAIN AGENT CLASS
	ai = MERL(args)
	print('Running ', args.config.env_choice, 'with config ', args.config.config, ' Predator State_dim:', args.pred_state_dim, 'Prey_state_dim', args.prey_state_dim,
	      'Action_dim', args.action_dim)
	time_start = time.time()

	###### TRAINING LOOP ########
	for gen in range(1, 10000000000):  # RUN VIRTUALLY FOREVER

		# ONE EPOCH OF TRAINING
		popn_fits, pg_fits, test_fits, prey_score = ai.train(gen, test_tracker, prey_tracker)

		# PRINT PROGRESS
		print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:',
		      mod.list_stat(pg_fits),
		      'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:',
		      pprint(ai.total_frames / (time.time() - time_start)), 'Evo', args.scheme, 'Prey Score:', prey_score)

		#Update elites tracker
		if gen >2 and args.popn_size > 0:
			#elites_tracker.update([ai.agents[0].evolver.rl_res['elites']], gen)
			selects_tracker.update([ai.agents.evolver.rl_res['selects']], gen)

		if ai.total_frames > args.frames_bound:
			break

	###Kill all processes
	try: ai.pg_task_pipes[0].send('TERMINATE')
	except: None