Exemple #1
0
	parser.add_argument('--rewards-net',                     help='filename containing a trained rewards network')
	parser.add_argument('--walk-len',         default=1000,  help='number of steps to take in the world', type=int)
	args = parser.parse_args()


	if not args.type:
		parser.print_help()
		sys.exit(1)

		
	if args.type == 'nnet':
		if not args.transitions_net or not args.rewards_net:
			parser.print_help()
			sys.exit(1)

		(tnet, tset) = io.read_neural_net(args.transitions_net)
		(rnet, rset) = io.read_neural_net(args.rewards_net)
		state_dim = tnet['in'].dim - 1
		reward_dim = rnet['out'].dim

		state = np.zeros(state_dim)
		for i in range(args.walk_len/2):
			action = npr.random() * 0.5 + 0.5
			new_state = net.activation(tnet, state, action)
			reward = net.activation(rnet, state, action)
			entry = state.tolist() + [action] + new_state.tolist() + reward.tolist()
			print('{}'.format(' '.join([str(x) for x in entry])))
			state = new_state
		for i in range(args.walk_len/2):
			action = npr.random() * 0.5 - 0.5
			new_state = net.activation(tnet, state, action)
Exemple #2
0
        # TODO: incorporate correlated rewards somehow
        maze = grd.make_multimaze(args.rows, args.cols, args.tasks)
        goals = grd.maze_goal_states(maze, args.tasks, args.rmeans, cov)
        io.write_maze_instance(maze, goals)
        print('# type={}, rows={}, cols={}, correlation={}, stdev={}'.
              format(args.type, args.rows, args.cols, args.correlation.tolist(), args.stdev.tolist()))


    # perturbation of existing instances
    elif args.type == 'perturbation':
        if not args.baseline:
            parser.print_help()
            sys.exit(1)
        else:
            # TODO: handle other kinds of models
            (net, trainset) = io.read_neural_net(args.baseline)
            net2 = net.fuzz_neural_net(net, args.fuzz_frac, args.fuzz_scale)
            io.write_neural_net(net2, trainset, 'fuzzed.net')
            io.write_train_log(net2, trainset, 'train_log_fuzzed.dat')


    # for discrete istances, we need to generate transition graphs and reward structures
    # and write them out directly
    elif args.type == 'discrete':
        if not args.graph_type:
            parser.print_help()
            sys.exit(1)

        params = None
        if args.graph_type == 'fern':
            params = {'frond_probability': args.frond_probability,