Beispiel #1
0
def get_exploration_agent(exploration_config, exploration_env):
    if exploration_config.get("type") == "learned":
        return dqn.DQNAgent.from_config(exploration_config, exploration_env)
    elif exploration_config.get("type") == "random":
        return policy.RandomPolicy(exploration_env.action_space)
    elif exploration_config.get("type") == "none":
        return policy.ConstantActionPolicy(grid.Action.end_episode)
    else:
        raise ValueError("Invalid exploration agent: {}".format(
            exploration_config.get("type")))
Beispiel #2
0
    def __init__(self, alpha, epsilon, discount, environment):

        self.action_space = environment.action_space
        self.alpha = alpha
        self.epsilon = epsilon
        self.discount = discount
        self.qvalues = np.zeros(
            (environment.state_space, environment.action_space), np.float32)
        self.policy = policy.RandomPolicy(environment.state_space,
                                          environment.action_space)
        self.explore_policy = self.policy
Beispiel #3
0
    def __init__(self, alpha, discount, environment):

        self.alpha = alpha
        self.discount = discount

        ssp = environment.state_space
        asp = environment.action_space

        self.action_space = asp
        self.qvalues = np.zeros((ssp, asp), np.float32)

        self.optimal_policy = policy.RandomPolicy(ssp, asp)
        self.explore_policy = self.optimal_policy
        self.draw_policy = policy.GreedyPolicy(ssp, asp, self.qvalues)
def episode_test() -> bool:
    racetrack_ = environment.track.GridWorld(environment.track.TRACK_1, rng)
    environment_ = environment.Environment(racetrack_, verbose=True)
    policy_ = policy.RandomPolicy(environment_, rng)
    agent_ = agent.Agent(environment_, policy_, verbose=True)
    episode_: agent.Episode = agent_.generate_episode()

    print()
    for t, rsa in enumerate(episode_.trajectory):
        print(
            f"t={t}\treward={rsa.reward}\tstate={rsa.state}\taction={rsa.action}"
        )

    return True
Beispiel #5
0
  trace_path = "traces/sample_trace.csv"
  config = cfg.Config.from_files_and_bindings(["spec_llc.json"], [])
  env = environment.CacheReplacementEnv(config, trace_path, 0)

  if args.policy_type == "belady":
    replacement_policy = belady.BeladyPolicy(env)
  elif args.policy_type == "lru":
    replacement_policy = policy.LRU()
  elif args.policy_type == "s4lru":
    replacement_policy = s4lru.S4LRU(config.get("associativity"))
  elif args.policy_type == "belady_nearest_neighbors":
    train_env = environment.CacheReplacementEnv(config, trace_path, 0)
    replacement_policy = belady.BeladyNearestNeighborsPolicy(train_env)
  elif args.policy_type == "random":
    replacement_policy = policy.RandomPolicy(np.random.RandomState(0))
  else:
    raise ValueError(f"Unsupported policy type: {args.policy_type}")

  state = env.reset()
  total_reward = 0
  steps = 0
  with tqdm.tqdm() as pbar:
    while True:
      action = replacement_policy.action(state)
      state, reward, done, info = env.step(action)
      total_reward += reward
      steps += 1
      pbar.update(1)
      if done:
        break
import game
import policy
import time

iteration = 1
numberWin = [0, 0]
numSteps = [[0 for i in range(iteration)] for i in range(0, 2)]
#boardSize = 6

for i in range(0, iteration):
    newGame = game.Gomoku(1)
    # print "start a new Gomoku game with board size %dx%d"%(boardSize, boardSize)

    #baselinePolicy = game.BaselinePolicy()
    randomPolicy = policy.RandomPolicy()
    minimaxPolicy = policy.MinimaxPolicy()
    time_to_move = [[], []]
    while (newGame.isEnd() < 0):
        nextPlayer = newGame.nextPlayer
        start = time.time()
        if (nextPlayer == 1):
            #action = randomPolicy.getNextAction(newGame)
            action = minimaxPolicy.getNextAction(newGame)
            # print "player 1 selects ", action
        else:
            action = randomPolicy.getNextAction(newGame)
            #action = baselinePolicy.getNextAction(newGame)
            # print "player 2 selects ", action
        # print time.time() - start
        time_to_move[nextPlayer - 1].append(time.time() - start)
        # print "player %d places on (%d, %d)"%(nextPlayer, action[0], action[1])
n_population = 10000

# symptom names for easy reference
from auxilliary import symptom_names

# Create the underlying population
print("Generating population")
population = simulator.Population(n_genes, n_vaccines, n_treatments)
X = population.generate(n_population)

# Make sure that your policy appropriately filters out the population if necessary. This is just a random sample of 1000 people

# Generate vaccination results

print("Vaccination")
vaccine_policy = policy.RandomPolicy(n_vaccines, list(range(
    -1, n_vaccines)))  # make sure to add -1 for 'no vaccine'

print("With a for loop")
# The simplest way to work is to go through every individual in the population
for t in range(n_population):
    a_t = vaccine_policy.get_action(X[t])
    # Then you can obtain results for everybody
    y_t = population.vaccinate([t], a_t)
    # Feed the results back in your policy. This allows you to fit the
    # statistical model you have.
    vaccine_policy.observe(X[t], a_t, y_t)

print("Vaccinate'em all")
# Here you can get an action for everybody in the population
A = vaccine_policy.get_action(X)
# Then you can obtain results for everybody
    def __init__(self, config=None):

        if config is None:
            config = {}
        self.env = wrap_dqn(gym.make(config.get('game', 'PongNoFrameskip-v4')))
        self.action_size = self.env.action_space.n

        self.to_vis = config.get('visualize', False)
        self.verbose = config.get('verbose', True)
        self.backup = config.get('backup', 25)
        self.episodes = config.get('episodes', 300)

        self.depth = config.get('depth', 4)
        self.state_size = config.get('space', (84, 84))
        self.model = None
        self._target_model = None

        self.prioritized = config.get(('prioritized', False))

        if self.prioritized:
            self.memory = PrioritizedMemory(
                max_len=config.get('mem_size', 100000))
        else:
            self.memory = SimpleMemory(max_len=config.get('mem_size', 100000))

        if config.get('duel', False):
            self.model = self._duel_conv()
        else:
            self.model = self._conv()

        self.model.compile(Adam(lr=config.get('lr', 1e-4)), loss=huber_loss)

        if config.get('target', True):
            self._target_model = clone_model(self.model)
            self._target_model.set_weights(self.model.get_weights())
            self._time = 0
            self.update_time = config.get('target_update', 1000)

        self.env._max_episode_steps = None
        self.batch_size = config.get('batch', 32 * 3)
        self.to_observe = config.get('to_observe', 10000)

        self.log_dir = config['log_dir']
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
        plot_model(self.model,
                   to_file=os.path.join(self.log_dir, 'model.png'),
                   show_shapes=True)

        attr = {
            'batch size': self.batch_size,
            'to observe': self.to_observe,
            'depth': self.depth
        }

        self.results = {'info': attr}

        load_prev = config.get('load', False)

        self.gamma = None
        pol = None

        if 'pol' in config:
            if config['pol'] == 'random':
                pol = policy.RandomPolicy()
            elif config['pol'] == 'eps':
                pol = policy.EpsPolicy(config.get('pol_eps', 0.1))

        self.pol = pol

        if load_prev:
            path = sorted([
                int(x) for x in os.listdir(self.log_dir)
                if os.path.isdir(os.path.join(self.log_dir, x))
            ])
            if len(path) != 0:
                load_prev = self.load(os.path.join(self.log_dir,
                                                   str(path[-1])))

        if self.pol is None:
            self.pol = policy.AnnealedPolicy(
                inner_policy=policy.EpsPolicy(1.0,
                                              other_pol=policy.GreedyPolicy()),
                attr='eps',
                value_max=1.0,
                value_min=config.get('ex_min', 0.02),
                value_test=0.5,
                nb_steps=config.get('ex_steps', 100000))
        if self.gamma is None:
            self.gamma = policy.EpsPolicy(float(config.get('gamma',
                                                           0.99))).get_value
# main
if __name__ == "__main__":
    import pandas
    import policy
    n_symptoms = 10
    n_genes = 128
    n_vaccines = 3
    n_treatments = 4
    pop = Population(n_genes, n_vaccines, n_treatments)
    n_observations = 1000
    X_observation = pop.generate(n_observations)
    pandas.DataFrame(X_observation).to_csv('observation_features.csv',
                                           header=False,
                                           index=False)
    n_treated = 1000
    X_treatment = pop.generate(n_treated)
    X_treatment = X_treatment[X_treatment[:, 1] == 1]
    print("Generating treatment outcomes")
    a, y = pop.treatment(
        X_treatment,
        policy.RandomPolicy(n_treatments, list(range(n_treatments))))
    pandas.DataFrame(X_treatment).to_csv('treatment_features.csv',
                                         header=False,
                                         index=False)
    pandas.DataFrame(a).to_csv('treatment_actions.csv',
                               header=False,
                               index=False)
    pandas.DataFrame(y).to_csv('treatment_outcomes.csv',
                               header=False,
                               index=False)