parser.add_argument('--rewards-net', help='filename containing a trained rewards network') parser.add_argument('--walk-len', default=1000, help='number of steps to take in the world', type=int) args = parser.parse_args() if not args.type: parser.print_help() sys.exit(1) if args.type == 'nnet': if not args.transitions_net or not args.rewards_net: parser.print_help() sys.exit(1) (tnet, tset) = io.read_neural_net(args.transitions_net) (rnet, rset) = io.read_neural_net(args.rewards_net) state_dim = tnet['in'].dim - 1 reward_dim = rnet['out'].dim state = np.zeros(state_dim) for i in range(args.walk_len/2): action = npr.random() * 0.5 + 0.5 new_state = net.activation(tnet, state, action) reward = net.activation(rnet, state, action) entry = state.tolist() + [action] + new_state.tolist() + reward.tolist() print('{}'.format(' '.join([str(x) for x in entry]))) state = new_state for i in range(args.walk_len/2): action = npr.random() * 0.5 - 0.5 new_state = net.activation(tnet, state, action)
# TODO: incorporate correlated rewards somehow maze = grd.make_multimaze(args.rows, args.cols, args.tasks) goals = grd.maze_goal_states(maze, args.tasks, args.rmeans, cov) io.write_maze_instance(maze, goals) print('# type={}, rows={}, cols={}, correlation={}, stdev={}'. format(args.type, args.rows, args.cols, args.correlation.tolist(), args.stdev.tolist())) # perturbation of existing instances elif args.type == 'perturbation': if not args.baseline: parser.print_help() sys.exit(1) else: # TODO: handle other kinds of models (net, trainset) = io.read_neural_net(args.baseline) net2 = net.fuzz_neural_net(net, args.fuzz_frac, args.fuzz_scale) io.write_neural_net(net2, trainset, 'fuzzed.net') io.write_train_log(net2, trainset, 'train_log_fuzzed.dat') # for discrete istances, we need to generate transition graphs and reward structures # and write them out directly elif args.type == 'discrete': if not args.graph_type: parser.print_help() sys.exit(1) params = None if args.graph_type == 'fern': params = {'frond_probability': args.frond_probability,