Ejemplo n.º 1
0
class ModelInterface():
    def __init__(self):
        self.agent = Agent(state_size=STATE_SIZE,
                           action_size=ACTION_SIZE,
                           random_seed=10)
        self.agent.actor_local.load_state_dict(
            torch.load('model/checkpoint_actor.pth', map_location='cpu'))
        self.agent.critic_local.load_state_dict(
            torch.load('model/checkpoint_critic.pth', map_location='cpu'))
        self.agent.actor_local.eval()
        self.agent.critic_local.eval()

    def get_action_q(self, state, action):
        s = np.zeros((128, 6))
        s[0, :] = state

        a = np.zeros((128, 2))
        a[0, :] = action

        state = torch.Tensor(s)
        action = torch.Tensor(a)

        return self.agent.critic_local(state, action).detach().numpy()[0, 0]

    def get_action(self, state):
        return self.agent.act(state)
Ejemplo n.º 2
0
 def __init__(self):
     self.agent = Agent(state_size=STATE_SIZE,
                        action_size=ACTION_SIZE,
                        random_seed=10)
     self.agent.actor_local.load_state_dict(
         torch.load('model/checkpoint_actor.pth', map_location='cpu'))
     self.agent.critic_local.load_state_dict(
         torch.load('model/checkpoint_critic.pth', map_location='cpu'))
     self.agent.actor_local.eval()
     self.agent.critic_local.eval()
Ejemplo n.º 3
0
def train_sarsa(num_trials, num_episodes, lr, gamma, total_trials, alarm_consistency, epsilon):
    undiscounted_returns = np.zeros(shape=(total_trials, num_episodes))
    decayed_epsilon = epsilon
    environment = Environment.Environment(Constants.observations_file, Constants.log_crash_prior, alarm_consistency)
    agent = Agent.Agent(epsilon=epsilon)

    for trial in xrange(num_trials[0], num_trials[1]):
        agent.reset(epsilon)
        for episode in xrange(num_episodes):
            # if episode % Constants.epsilon_decay_episode:
            #     decayed_epsilon /= 2
            #     agent.set_epsilon(decayed_epsilon)

            print "Trial: %d, Episode: %d" % (trial, episode)
            environment.reset()
            current_state = environment.current_state
            current_action = agent.get_action(current_state)
            total_undiscounted_reward = 0
            while False or not environment.done:
                next_state, reward, done = environment.step(current_action)
                next_action = agent.get_action(next_state)
                # print current_state, current_action, reward, next_state, next_action
                agent.sarsa_update(current_state, current_action, reward, next_state, next_action, lr, gamma)
                current_state = next_state
                current_action = next_action
                total_undiscounted_reward += reward
            undiscounted_returns[trial][episode] = total_undiscounted_reward

    return undiscounted_returns
Ejemplo n.º 4
0
def add_rep():

    agency = request.form.get("agency")
    agent = request.form.get("agent")
    email = request.form.get("email")
    phone = request.form.get("phone")

    user_id = session["current_user"]
    user = User.query.get(user_id)

    user_agencies = [agency.agency for agency in user.agencies]
    print(user_agencies)
    print(agency)
    if agency not in user_agencies:
        kwargs = dict(user_id=user_id, agency=agency)

        db.session.add(Agency(**kwargs))
        db.session.commit()
        print("agency added")

    agency_id = Agency.query.filter_by(agency=agency).first().agency_id

    kwargs = dict(user_id=user_id,
                  agency_id=agency_id,
                  agent=agent,
                  email=email,
                  phone=phone)

    db.session.add(Agent(**kwargs))
    db.session.commit()

    return redirect("/users/{}/my_contact".format(user_id))
Ejemplo n.º 5
0
def load_agents():
	"""Load agents from seed data into database"""

	with open("seed_data/agents.txt") as agents: 
		for row in agents: 
			agent = row.rstrip().split("|")

			hidden = True if agent[6] == "True" else False

			kwargs = dict(
			agent_id = agent[0],
			user_id = agent[1],
			agency_id = agent[2],
			agent = agent[3], 
			email = agent[4], 
			phone = agent[5],
			hidden = hidden 
			)

			keys_to_remove = []

			for key in kwargs.keys(): 
				if kwargs[key] == "":
					keys_to_remove.append(key)

			for key in keys_to_remove:
				del kwargs[key]

			agent = Agent(**kwargs)

			db.session.add(agent)

	db.session.commit()
Ejemplo n.º 6
0
def main():
    config = Config()
    print('*' * 20, config.envname, config.method, '*' * 20)
    env = config.env
    if config.seed:
        env.seed(config.seed)
        torch.manual_seed(config.seed)
    agent = Agent(env.observation_space.shape[0],
                  env.action_space.shape[0]).to(config.device)
    expert = load_policy(config.expert_path + config.envname + '.pkl')
    method = config.method

    if method == 'BC':
        agent = BehavioralCloning(config, agent, expert)
    elif method == 'DA':
        agent = DAgger(config, agent, expert)
    else:
        NotImplementedError(method)

    avrg_mean, avrg_std = Eval(config, expert)
    print('[expert] avrg_mean:{:.2f}  avrg_std:{:.2f}'.format(
        avrg_mean, avrg_std))

    avrg_mean, avrg_std = Eval(config, agent)
    print('[agent] avrg_mean:{:.2f}  avrg_std:{:.2f}'.format(
        avrg_mean, avrg_std))
Ejemplo n.º 7
0
    def __init__(self, **kwargs):
        super(SupervisedTrainer, self).__init__(**kwargs)

        self.targets = tf.placeholder(tf.int32, [None, None])
        self.targets_length = tf.placeholder(tf.int32, [None])
        self.slot_targets = tf.placeholder(tf.int32, [None, None])
        self.slot_any_targets = tf.placeholder(tf.int32, [None, None])
        self.action_targets = tf.placeholder(tf.int32, [None])
        self.value_targets = tf.placeholder(tf.float32, [None])

        self._decoder_sampling_p = tf.placeholder(tf.float32, [])
        self._loss_mixture_weights = tf.placeholder(tf.float32, [None])
        self._dropout = tf.placeholder(tf.float32, [])

        self.agent = Agent(n_slots=self._n_slots,
                           n_actions=self._n_actions,
                           word_embeddings_shape=self._word_embeddings_shape,
                           hidden_size=self._hidden_size,
                           dropout=self._dropout,
                           decoder_helper_initializer=self._decoder_helper())

        self._loss()
        self._optimizer()
        self._metrics_writers()
        print('Dilatation rates:', self.DILATATION_RATES)
Ejemplo n.º 8
0
def dashboard_summary():
    summary = {
        'agent_count': Agent.count(),
        'service_count': SInfo.count(),
        'alarm_count': 0,
        'sample_count': 0
    }
    return dump_json(summary)
Ejemplo n.º 9
0
Archivo: algo.py Proyecto: vin136/lagom
    def _prepare(self, config):
        self.env = make_vec_env(SerialVecEnv, make_gym_env, config['env.id'],
                                config['train.N'], 0)
        self.env = VecClipAction(self.env)
        if config['env.standardize']:
            self.env = VecStandardize(self.env,
                                      use_obs=True,
                                      use_reward=False,
                                      clip_obs=10.0,
                                      clip_reward=10.0,
                                      gamma=0.99,
                                      eps=1e-08)
        self.env_spec = EnvSpec(self.env)

        self.device = torch.device('cpu')

        self.agent = Agent(config, self.env_spec, self.device)
Ejemplo n.º 10
0
    def _build_agent(self):
        self.agent = Agent(
            word_embeddings_shape=[len(self._word_embeddings), 300],
            n_slots=len(self._slot_embeddings),
            n_actions=len(self._action_embbeddings),
            hidden_size=self._hidden_size)

        self.agent.saver.restore(self._sess, self._checkpoint)
Ejemplo n.º 11
0
def get_agents():
    mhost = os.getenv('MASTER_HOST', None) or socket.gethostname()
    mport = _CONFIG['master']['server']['port']
    master_addr = '%s:%s' % (mhost, mport)
    agents = Agent.query(orderby='last_msg_at DESC')
    thresh = datetime.utcnow() - timedelta(minutes=5)
    for a in agents:
        a.status = 'active' if a.last_msg_at and a.last_msg_at >= thresh else 'inactive'
    return dump_json({'agents': agents, 'master_addr': master_addr})
Ejemplo n.º 12
0
def main():
    data_size = 2000
    agent = Agent(1, envs).to(device)
    optimizer = optim.Adam(agent.parameters(), lr=0.0005, eps=1e-5)
    for epoch in range(data_size):
        rollouts = get_rollouts(trajectories, 64)
        losses = []
        for states, rewards, actions in rollouts:
            x = torch.from_numpy(states).float()
            y = torch.from_numpy(actions).long()
            loss = train(agent, x, y, optimizer, epochs=2)
            losses.append(loss)

        if epoch % 100 == 0:
            if not os.path.exists(f"models/"):
                os.makedirs(f"models/")
            torch.save(agent.state_dict(), f"models/agent-il.pt")
        print(f"epoch: {epoch} loss: {np.mean(losses)}")
Ejemplo n.º 13
0
def main():
    args = get_args()
    args.critic_layers = literal_eval(args.critic_layers)
    args.actor_layers = literal_eval(args.actor_layers)

    save_dir = os.path.join('tests')
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    state_transform = NormState(args.prosthetic)
    # state_transform = StateVelCentr(obstacles_mode='standard',
    #                                 exclude_centr=True,
    #                                 vel_states=[])
    env = RunEnv2(state_transform,
                  integrator_accuracy=args.accuracy,
                  model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=1)
    env.change_model(args.modeldim, args.prosthetic, args.difficulty)
    num_actions = env.get_action_space_size()
    del env

    model_params = {
        'state_size': state_transform.state_size,
        'num_act': num_actions,
        'gamma': 0,
        'actor_layers': args.actor_layers,
        'critic_layers': args.critic_layers,
        'actor_lr': 0,
        'critic_lr': 0,
        'layer_norm': args.layer_norm
    }
    actor_fn, params_actor, params_crit, actor_lr, critic_lr = \
            build_model_test(**model_params)
    actor = Agent(actor_fn, params_actor, params_crit)

    actor.load(args.weights)

    weights = [p.get_value() for p in params_actor]

    global_step = 0
    test_agent(args, state_transform, args.episodes, actor, weights,
               global_step, save_dir)
Ejemplo n.º 14
0
def submit_agent(args, model_params):

    ##########################################################

    actor_fn, params_actor, params_crit = build_model_test(**model_params)
    weights = [p.get_value() for p in params_actor]
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)

    # Settings
    remote_base = "http://grader.crowdai.org:1729"
    token = args.token
    client = Client(remote_base)

    # Create environment
    di = client.env_create(token, env_id="ProstheticsEnv")

    stat = []
    ep = 1
    ii = 0
    reward_sum = 0
    print('\n\n#################################################\n\n')
    while True:
        ii += 1
        proj = env.dict_to_vec(di)
        action = actor.act(proj)
        action += np.random.rand(len(action)) / 10.

        [di, reward, done, info] = client.env_step(action.tolist(), True)
        reward_sum += reward
        print('ep: ' + str(ep) + '  >>  step: ' + str(int(ii)) +
              '  >>  reward: ' + format(reward, '.2f') + '  \t' +
              str(int(reward_sum)) + '\t  >>  pelvis X Y Z: \t' +
              format(di['body_pos']['pelvis'][0], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][1], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][2], '.2f'))
        if done:
            print('\n\n#################################################\n\n')
            stat.append([ep, ii, reward_sum])
            di = client.env_reset()
            ep += 1
            ii = 0
            reward_sum = 0
            if not di:
                break
    for e in stat:
        print(e)
    print('\n\nclient.submit()\n\n')
    client.submit()
    ##########################################################
    print('\n\n#################################################\n\n')
    print('DONE\n\n')
Ejemplo n.º 15
0
    def _build_agent(self):
        self.agent = Agent(
            word_embeddings_shape=[len(self._word_embeddings), 300],
            n_slots=len(self._slot_embeddings),
            n_actions=len(self._action_embbeddings),
            hidden_size=self._hidden_size,
            scope='target_agent',
            decoder_max_iter=50)

        self.agent.saver.restore(self._sess, self._checkpoint)
Ejemplo n.º 16
0
def test_agent(args, testing, state_transform, num_test_episodes, model_params,
               weights, best_reward, updates, global_step, save_dir):
    env = RunEnv2(state_transform,
                  visualize=args.test,
                  integrator_accuracy=args.accuracy,
                  model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=1)
    test_rewards = []

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = \
        build_model(**model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=2)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            state, reward, terminal, _ = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards.append(test_reward)
    mean_reward = np.mean(test_rewards)
    std_reward = np.std(test_rewards)

    test_str ='global step {}; test reward mean: {:.2f}, std: {:.2f}, all: {} '.\
        format(global_step.value, float(mean_reward), float(std_reward), test_rewards)

    print(test_str)
    with open(os.path.join(save_dir, 'test_report.log'), 'a') as f:
        f.write(test_str + '\n')

    if mean_reward > best_reward.value or mean_reward > 30 * env.reward_mult:
        if mean_reward > best_reward.value:
            best_reward.value = mean_reward
        fname = os.path.join(
            save_dir, 'weights_updates_{}_reward_{:.2f}.pkl'.format(
                updates.value, mean_reward))
        actor.save(fname)
    testing.value = 0
Ejemplo n.º 17
0
Archivo: algo.py Proyecto: vin136/lagom
class ESWorker(BaseESWorker):
    def prepare(self):
        self.agent = None

    def _prepare(self, config):
        self.env = make_vec_env(SerialVecEnv, make_gym_env, config['env.id'],
                                config['train.N'], 0)
        self.env = VecClipAction(self.env)
        if config['env.standardize']:
            self.env = VecStandardize(self.env,
                                      use_obs=True,
                                      use_reward=False,
                                      clip_obs=10.0,
                                      clip_reward=10.0,
                                      gamma=0.99,
                                      eps=1e-08)
        self.env_spec = EnvSpec(self.env)

        self.device = torch.device('cpu')

        self.agent = Agent(config, self.env_spec, self.device)

    def f(self, config, solution):
        if self.agent is None:
            self._prepare(config)

        solution = torch.from_numpy(np.asarray(solution)).float().to(
            self.device)
        assert solution.numel() == self.agent.num_params

        # Load solution params to agent
        self.agent.from_vec(solution)

        runner = EpisodeRunner(config, self.agent, self.env)
        with torch.no_grad():
            D = runner(self.env_spec.T)
        mean_return = D.numpy_rewards.sum(-1).mean()

        # ES does minimization, so use negative returns
        function_value = -mean_return

        return function_value
Ejemplo n.º 18
0
def del_agent(aid):
    connectType = request.args.get('connect_type')
    u = request.args.get('username')
    p = request.args.get('password')
    agent = Agent.get_by_id(aid)
    logging.info('remove agent on %s@%s', u, agent)
    with NodeConnector(agent.host, u, p) as nc:
        nc.remove_agent()
        agent.remove()
    logging.info('agent removed on %s@%s finished.', u, agent)
    return dump_json(agent)
Ejemplo n.º 19
0
def infection_events(env: simpy.Environment, infected: Agent, rng: np.random.Generator):
    print(f'@t={env.now} - {infected}->{State.INFECTED.name}')
    infected.state = State.INFECTED
    yield env.timeout(delay=rng.normal(loc=4.6, scale=0.3))

    print(f'@t={env.now} - {infected}->{State.INFECTIOUS.name}')
    infected.state = State.INFECTIOUS

    if rng.uniform() < p_asymptomatic:
        # Asymptomatic
        yield env.timeout(delay=rng.normal(loc=6.5, scale=0.4))
        print(f'@t={env.now} - {infected}->{State.REMOVED.name}')
        infected.state = State.REMOVED
    else:
        # Symptomatic
        yield env.timeout(delay=0.5)
        print(f'@t={env.now} - {infected}->{State.SYMPTOMATIC_INFECTIOUS.name}')
        infected.state = State.SYMPTOMATIC_INFECTIOUS

        yield env.timeout(delay=rng.normal(loc=6.0, scale=0.4))
        print(f'@t={env.now} - {infected}->{State.REMOVED.name}')
        infected.state = State.REMOVED
Ejemplo n.º 20
0
def test_agent(args, testing, num_test_episodes, model_params, weights,
               best_reward, updates, global_step, save_dir):
    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)
    test_rewards_all = []
    test_pelvis_X_all = []

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(
        **model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    # if args.weights is not None:
    #     actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=0)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            state, reward, terminal, info = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards_all.append(test_reward)
        test_pelvis_X_all.append(info['pelvis_X'])
    test_reward_mean = np.mean(test_rewards_all)
    mean_pelvis_X = np.mean(test_pelvis_X_all)
    std_reward = np.std(test_rewards_all)

    test_str ='global step {}; test_reward_mean: {:.2f}, test_rewards_all: {}; mean_pelvis_Xmean: {:.2f}, test_pelvis_X_all: {} '.\
        format(global_step.value, float(test_reward_mean), test_rewards_all, float(mean_pelvis_X), test_pelvis_X_all)

    print(test_str)
    try:
        with open(os.path.join(save_dir, 'test_report.log'), 'a') as f:
            f.write(test_str + '\n')
    except:
        print('#############################################')
        print('except  »  f.write(test_str )')
        print('#############################################')

    if test_reward_mean > best_reward.value or test_reward_mean > 30 * env.reward_mult:
        if test_reward_mean > best_reward.value:
            best_reward.value = test_reward_mean
        fname = os.path.join(
            save_dir,
            'weights_updates_{}_reward_{:.1f}_pelvis_X_{:.1f}.pkl'.format(
                updates.value, test_reward_mean, mean_pelvis_X))
        actor.save(fname)
    testing.value = 0
Ejemplo n.º 21
0
def main():
    """ Agent setting """
    agent = Agent()
    """ Learning """
    agent.learn()
    """ Test """
    agent.test()
    agent.sess.close()
Ejemplo n.º 22
0
def load_agents():
    """ Load agents from agents.txt to database"""
    for row in open("seed_data/agents.txt"):
        row = row.strip()
        id, name, password, phone_number, email, tier = row.split("|")

        agent = Agent(id=id.strip(),
                      name=name.strip(),
                      password=password.strip(),
                      phone_number=phone_number.strip(),
                      email=email.strip(),
                      tier=int(tier.strip()))

        db.session.add(agent)

    db.session.commit()
Ejemplo n.º 23
0
def main():

    env = gym.make("LunarLander-v2")
    agent = Agent(env)
    agent.load_state_dict(torch.load("./models/agent.pt"))
    agent.eval()

    obs = env.reset()
    done = False
    for i in range(10000):
        env.render()
        obs = torch.from_numpy(obs).float()
        action, _, _ = agent.get_action(obs)
        obs, rew, done, info = env.step(action.cpu().numpy())
        sleep(0.001)
        if done:
            obs = env.reset()
Ejemplo n.º 24
0
def test_agent(args, num_test_episodes, model_params):
    env = RunEnv2(visualize=True,
                  model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)
    test_rewards = []

    # train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(**model_params)
    # actor_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(**model_params)
    actor_fn, params_actor, params_crit = build_model_test(**model_params)
    weights = [p.get_value() for p in params_actor]
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=0)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            # state = np.concatenate((state,state,state))[:390]  # ndrw tmp
            action = actor.act(state)  # ndrw tmp
            # if args.prosthetic:
            #     action = np.zeros(19)  # ndrw tmp
            # else:
            #     action = np.zeros(22)  # ndrw tmp
            state, reward, terminal, _ = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards.append(test_reward)
    mean_reward = np.mean(test_rewards)
    std_reward = np.std(test_rewards)

    global_step = 0
    test_str ='global step {}; test reward mean: {:.2f}, std: {:.2f}, all: {} '.\
        format(global_step.value, float(mean_reward), float(std_reward), test_rewards)

    print(test_str)
    with open(os.path.join('test_report.log'), 'a') as f:
        f.write(test_str + '\n')
Ejemplo n.º 25
0
def main():
    data_size = [1, 100, 100, 100, 100, 100, 100]
    for size in data_size:
        x = []
        y = []
        student = Agent(env)
        for episode in range(size):
            states, actions, rewards = generate_rollout(expert, env)
            x.extend(states)
            y.extend(actions)
        x = torch.stack(x)
        y = torch.cat(y)
        train(student, x, y)
        episode_rewards = []
        for _ in range(50):
            states, actions, rewards = generate_rollout(student, env)
            total_reward = np.sum(rewards)
            episode_rewards.append(total_reward)
        print(
            f'Number of training rollouts: {str(size)}: reward mean: {str(np.mean(episode_rewards))} \n'
        )
Ejemplo n.º 26
0
Created on Tue Oct 16 23:57:38 2018

@author: Abhista
"""

import gym
from model import Agent
from utils import plotLearning
from gym import wrappers
import numpy as np

if __name__ == '__main__':
    env = gym.make('SpaceInvaders-v0')
    brain = Agent(gamma=0.95,
                  epsilon=1.0,
                  alpha=0.003,
                  maxMemorySize=5000,
                  replace=None)
    while brain.memCntr < brain.memSize:
        observation = env.reset()
        done = False
        while not done:
            # 0 no action, 1 fire, 2 move right, 3 move left, 4 move right fire, 5 move left fire
            action = env.action_space.sample()
            observation_, reward, done, info = env.step(action)
            if done and info['ale.lives'] == 0:
                reward = -100
            brain.storeTransition(
                np.mean(observation[15:200, 30:125], axis=2), action, reward,
                np.mean(observation_[15:200, 30:125], axis=2))
            observation = observation_
def train():
    print()
    print("RUNNING THE MINECRAFT SIMULATION")
    print()

    RENDER = False
    # RENDER = True
    LOAD_MODEL = False
    # LOAD_MODEL = True
    start_eps = 0.8

    WRAP = False
    GRID_SIZE = 7
    LOCAL_GRID_SIZE = 9  # Has to be an odd number (I think...)
    SEED = 1
    FOOD_COUNT = 1
    OBSTACLE_COUNT = 0
    # MAP_PATH = "./Maps/Grid{}/map2.txt".format(GRID_SIZE)
    MAP_PATH = None

    env = Environment(wrap=WRAP,
                      grid_size=GRID_SIZE,
                      rate=80,
                      max_time=30,
                      food_count=FOOD_COUNT,
                      obstacle_count=OBSTACLE_COUNT,
                      lava_count=0,
                      zombie_count=0,
                      action_space=5,
                      map_path=MAP_PATH)

    brain = Agent(gamma=0.99,
                  epsilon=start_eps,
                  alpha=0.01,
                  maxMemorySize=10000,
                  replace=10)

    if LOAD_MODEL:
        try:
            path = "./Models/Torch/my_model.pth"
            brain.load_model(path)
            print("Model loaded from path:", path)
            print()
            brain.EPSILON = 0.05
        except Exception:
            print('Could not load model')
            print('Press <ENTER> to continue with random initialision')
            print()
            input()
            # quit()

    if RENDER: env.prerender()

    games_played = 0

    print("INITIALISING REPLAY MEMORY")

    while brain.memCntr < brain.memSize:
        observation, _ = env.reset()
        # print(observation)
        # observation = env.local_state_vector_3D()
        done = False

        if RENDER: env.render()  # Render first screen
        while not done:

            action = brain.chooseAction(observation)

            observation_, reward, done, info = env.step(action)
            # observation_ = env.local_state_vector_3D()
            # print(observation_)
            if done:
                # reward = -1
                games_played += 1
            brain.storeTransition(observation, action, reward, done,
                                  observation_)

            observation = observation_
            if RENDER: env.render()

    print("Done initialising replay memory. Played {} games".format(
        games_played))

    scores = []
    epsHistory = []
    numGames = 100000
    print_episode = 100
    batch_size = 16

    avg_score = 0
    avg_time = 0
    avg_loss = 0

    print()
    print("TRAINING MODEL")
    print()

    for i in range(numGames):
        epsHistory.append(brain.EPSILON)
        done = False
        observation, _ = env.reset()
        # observation = env.local_state_vector_3D()
        score = 0
        lastAction = 0

        if RENDER: env.render()  # Render first screen
        while not done:
            action = brain.chooseAction(observation)

            observation_, reward, done, info = env.step(action)

            # observation_ = env.local_state_vector_3D()
            # score += reward

            # print(observation_)

            brain.storeTransition(observation, action, reward, done,
                                  observation_)

            observation = observation_
            loss = brain.learn(batch_size)
            lastAction = action
            if RENDER: env.render()

        avg_score += info["score"]
        avg_time += info["time"]
        avg_loss += loss.item()

        if i % print_episode == 0 and not i == 0 or i == numGames - 1:
            print("Episode", i,
                  "\tavg time: {0:.3f}".format(avg_time / print_episode),
                  "\tavg score: {0:.3f}".format(avg_score / print_episode),
                  "\tavg loss: {0:.3f}".format(avg_loss / print_episode),
                  "\tepsilon: %.4f" % brain.EPSILON)
            brain.save_model("./Models/Torch/my_model{}.pth".format(i))
            avg_loss = 0
            avg_score = 0
            avg_time = 0

        # scores.append(score)
        # print("score:", score)

    brain.save_model("./Models/Torch/my_model.pth")
Ejemplo n.º 28
0
    for k in range(5):
        done = False
        state = game.reset()
        while not done:
            game.render()
            #vid.capture_frame()
            if not arbit:
                action = agent.get_action(state, sample=False)
            else:
                action = random.choice(range(action_size))
            next_state, reward, done, _ = game.step(action)
            state = next_state
            time.sleep(0.015)


if __name__ == '__main__':
    #pip install gym[Box2D] -- maybe needed
    game = gym.make('LunarLander-v2')
    #vid = gym.wrappers.monitoring.video_recorder.VideoRecorder(game, path = './random.mp4')
    action_size = game.action_space.n
    print("Action size ", action_size)

    state_size = game.observation_space.shape[0]
    print("State size ", state_size)
    num_episodes = 2000
    agent = Agent(state_size, action_size, gamma=0.99, fc1=64, fc2=64)

    #train(game, num_episodes, agent)
    infer(game, vid, agent, arbit=False)
    #game.close()
Ejemplo n.º 29
0
def run_agent(model_params, weights, state_transform, data_queue, weights_queue,
              process, global_step, updates, best_reward, param_noise_prob, save_dir,
              max_steps=10000000):

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = \
        build_model(**model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)

    env = RunEnv2(state_transform, max_obstacles=config.num_obstacles, skip_frame=config.skip_frames)
    random_process = OrnsteinUhlenbeckProcess(theta=.1, mu=0., sigma=.2, size=env.noutput,
                                              sigma_min=0.05, n_steps_annealing=1e6)
    # prepare buffers for data
    states = []
    actions = []
    rewards = []
    terminals = []

    total_episodes = 0
    start = time()
    action_noise = True
    while global_step.value < max_steps:
        seed = random.randrange(2**32-2)
        state = env.reset(seed=seed, difficulty=2)
        random_process.reset_states()

        total_reward = 0.
        total_reward_original = 0.
        terminal = False
        steps = 0
        
        while not terminal:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            if action_noise:
                action += random_process.sample()

            next_state, reward, next_terminal, info = env.step(action)
            total_reward += reward
            total_reward_original += info['original_reward']
            steps += 1
            global_step.value += 1

            # add data to buffers
            states.append(state)
            actions.append(action)
            rewards.append(reward)
            terminals.append(terminal)

            state = next_state
            terminal = next_terminal

            if terminal:
                break

        total_episodes += 1

        # add data to buffers after episode end
        states.append(state)
        actions.append(np.zeros(env.noutput))
        rewards.append(0)
        terminals.append(terminal)

        states_np = np.asarray(states).astype(np.float32)
        data = (states_np,
                np.asarray(actions).astype(np.float32),
                np.asarray(rewards).astype(np.float32),
                np.asarray(terminals),
                )
        weight_send = None
        if total_reward > best_reward.value:
            weight_send = actor.get_actor_weights()
        # send data for training
        data_queue.put((process, data, weight_send, total_reward))

        # receive weights and set params to weights
        weights = weights_queue.get()

        report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len {}, ' \
                     'reward: {:.2f}, original_reward {:.4f}; best reward: {:.2f} noise {}'. \
            format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps,
                   total_reward, total_reward_original, best_reward.value, 'actions' if action_noise else 'params')
        print(report_str)

        with open(os.path.join(save_dir, 'train_report.log'), 'a') as f:
            f.write(report_str + '\n')

        actor.set_actor_weights(weights)
        action_noise = np.random.rand() < 1 - param_noise_prob
        if not action_noise:
            set_params_noise(actor, states_np, random_process.current_sigma)

        # clear buffers
        del states[:]
        del actions[:]
        del rewards[:]
        del terminals[:]

        if total_episodes % 100 == 0:
            env = RunEnv2(state_transform, max_obstacles=config.num_obstacles, skip_frame=config.skip_frames)
Ejemplo n.º 30
0
        p.flip_left_right(probability=0.5)
        p.zoom_random(probability=0.5, percentage_area=0.95)
        p.resize(probability=1.0, width=246, height=205)
        p.sample(n_augment)
        print("Data augmentation done on directory '%s'." % (com_anel_directory))

        print("Found %d items on directory '%s', starting data augmentation..." % (len(glob.glob(sem_anel_directory + "/*.bmp")), sem_anel_directory))
        p = Augmentor.Pipeline(sem_anel_directory, output_directory="augmented")
        p.rotate(probability=1, max_left_rotation=5, max_right_rotation=5)
        p.flip_left_right(probability=0.5)
        p.zoom_random(probability=0.5, percentage_area=0.95)
        p.resize(probability=1.0, width=246, height=205)
        p.sample(n_augment)
        print("Data augmentation done on directory '%s'." % (sem_anel_directory))

        model = Agent()

        imgs = []
        labels = []

        for filename in glob.glob(com_anel_directory + "/augmented/*.bmp"):
            img = cv2.imread(filename)
            img = img/255.0
            imgs.append(img)
            labels.append(1.0)

        for filename in glob.glob(sem_anel_directory + "/augmented/*.bmp"):
            img = cv2.imread(filename)
            img = img/255.0
            imgs.append(img)
            labels.append(0.0)